1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.coordination; 19 20 import org.apache.commons.logging.Log; 21 import org.apache.commons.logging.LogFactory; 22 import org.apache.hadoop.hbase.classification.InterfaceAudience; 23 import org.apache.hadoop.hbase.CoordinatedStateManager; 24 import org.apache.hadoop.hbase.HRegionInfo; 25 import org.apache.hadoop.hbase.ServerName; 26 import org.apache.hadoop.hbase.executor.EventType; 27 import org.apache.hadoop.hbase.master.AssignmentManager; 28 import org.apache.hadoop.hbase.master.RegionState; 29 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; 30 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; 31 import org.apache.hadoop.hbase.regionserver.HRegion; 32 import org.apache.hadoop.hbase.regionserver.RegionServerServices; 33 import org.apache.hadoop.hbase.zookeeper.ZKAssign; 34 import org.apache.hadoop.hbase.zookeeper.ZKUtil; 35 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; 36 import org.apache.zookeeper.KeeperException; 37 38 import java.io.IOException; 39 40 /** 41 * ZK-based implementation of {@link OpenRegionCoordination}. 42 */ 43 @InterfaceAudience.Private 44 public class ZkOpenRegionCoordination implements OpenRegionCoordination { 45 private static final Log LOG = LogFactory.getLog(ZkOpenRegionCoordination.class); 46 47 private CoordinatedStateManager coordination; 48 private final ZooKeeperWatcher watcher; 49 ZkOpenRegionCoordination(CoordinatedStateManager coordination, ZooKeeperWatcher watcher)50 public ZkOpenRegionCoordination(CoordinatedStateManager coordination, 51 ZooKeeperWatcher watcher) { 52 this.coordination = coordination; 53 this.watcher = watcher; 54 } 55 56 //------------------------------- 57 // Region Server-side operations 58 //------------------------------- 59 60 /** 61 * @param r Region we're working on. 62 * @return whether znode is successfully transitioned to OPENED state. 63 * @throws java.io.IOException 64 */ 65 @Override transitionToOpened(final HRegion r, OpenRegionDetails ord)66 public boolean transitionToOpened(final HRegion r, OpenRegionDetails ord) throws IOException { 67 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; 68 69 boolean result = false; 70 HRegionInfo hri = r.getRegionInfo(); 71 final String name = hri.getRegionNameAsString(); 72 // Finally, Transition ZK node to OPENED 73 try { 74 if (ZKAssign.transitionNodeOpened(watcher, hri, 75 zkOrd.getServerName(), zkOrd.getVersion()) == -1) { 76 String warnMsg = "Completed the OPEN of region " + name + 77 " but when transitioning from " + " OPENING to OPENED "; 78 try { 79 String node = ZKAssign.getNodeName(watcher, hri.getEncodedName()); 80 if (ZKUtil.checkExists(watcher, node) < 0) { 81 // if the znode 82 coordination.getServer().abort(warnMsg + "the znode disappeared", null); 83 } else { 84 LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " + 85 "so now unassigning -- closing region on server: " + zkOrd.getServerName()); 86 } 87 } catch (KeeperException ke) { 88 coordination.getServer().abort(warnMsg, ke); 89 } 90 } else { 91 LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() + 92 " to OPENED in zk on " + zkOrd.getServerName()); 93 result = true; 94 } 95 } catch (KeeperException e) { 96 LOG.error("Failed transitioning node " + name + 97 " from OPENING to OPENED -- closing region", e); 98 } 99 return result; 100 } 101 102 /** 103 * Transition ZK node from OFFLINE to OPENING. 104 * @param regionInfo region info instance 105 * @param ord - instance of open region details, for ZK implementation 106 * will include version Of OfflineNode that needs to be compared 107 * before changing the node's state from OFFLINE 108 * @return True if successful transition. 109 */ 110 @Override transitionFromOfflineToOpening(HRegionInfo regionInfo, OpenRegionDetails ord)111 public boolean transitionFromOfflineToOpening(HRegionInfo regionInfo, 112 OpenRegionDetails ord) { 113 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; 114 115 // encoded name is used as znode encoded name in ZK 116 final String encodedName = regionInfo.getEncodedName(); 117 118 // TODO: should also handle transition from CLOSED? 119 try { 120 // Initialize the znode version. 121 zkOrd.setVersion(ZKAssign.transitionNode(watcher, regionInfo, 122 zkOrd.getServerName(), EventType.M_ZK_REGION_OFFLINE, 123 EventType.RS_ZK_REGION_OPENING, zkOrd.getVersionOfOfflineNode())); 124 } catch (KeeperException e) { 125 LOG.error("Error transition from OFFLINE to OPENING for region=" + 126 encodedName, e); 127 zkOrd.setVersion(-1); 128 return false; 129 } 130 boolean b = isGoodVersion(zkOrd); 131 if (!b) { 132 LOG.warn("Failed transition from OFFLINE to OPENING for region=" + 133 encodedName); 134 } 135 return b; 136 } 137 138 /** 139 * Update our OPENING state in zookeeper. 140 * Do this so master doesn't timeout this region-in-transition. 141 * We may lose the znode ownership during the open. Currently its 142 * too hard interrupting ongoing region open. Just let it complete 143 * and check we still have the znode after region open. 144 * 145 * @param context Some context to add to logs if failure 146 * @return True if successful transition. 147 */ 148 @Override tickleOpening(OpenRegionDetails ord, HRegionInfo regionInfo, RegionServerServices rsServices, final String context)149 public boolean tickleOpening(OpenRegionDetails ord, HRegionInfo regionInfo, 150 RegionServerServices rsServices, final String context) { 151 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; 152 if (!isRegionStillOpening(regionInfo, rsServices)) { 153 LOG.warn("Open region aborted since it isn't opening any more"); 154 return false; 155 } 156 // If previous checks failed... do not try again. 157 if (!isGoodVersion(zkOrd)) return false; 158 String encodedName = regionInfo.getEncodedName(); 159 try { 160 zkOrd.setVersion(ZKAssign.confirmNodeOpening(watcher, 161 regionInfo, zkOrd.getServerName(), zkOrd.getVersion())); 162 } catch (KeeperException e) { 163 coordination.getServer().abort("Exception refreshing OPENING; region=" + encodedName + 164 ", context=" + context, e); 165 zkOrd.setVersion(-1); 166 return false; 167 } 168 boolean b = isGoodVersion(zkOrd); 169 if (!b) { 170 LOG.warn("Failed refreshing OPENING; region=" + encodedName + 171 ", context=" + context); 172 } 173 return b; 174 } 175 176 /** 177 * Try to transition to open. 178 * 179 * This is not guaranteed to succeed, we just do our best. 180 * 181 * @param rsServices 182 * @param hri Region we're working on. 183 * @param ord Details about region open task 184 * @return whether znode is successfully transitioned to FAILED_OPEN state. 185 */ 186 @Override tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices, final HRegionInfo hri, OpenRegionDetails ord)187 public boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices, 188 final HRegionInfo hri, 189 OpenRegionDetails ord) { 190 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; 191 boolean result = false; 192 final String name = hri.getRegionNameAsString(); 193 try { 194 LOG.info("Opening of region " + hri + " failed, transitioning" + 195 " from OFFLINE to FAILED_OPEN in ZK, expecting version " + 196 zkOrd.getVersionOfOfflineNode()); 197 if (ZKAssign.transitionNode( 198 rsServices.getZooKeeper(), hri, 199 rsServices.getServerName(), 200 EventType.M_ZK_REGION_OFFLINE, 201 EventType.RS_ZK_REGION_FAILED_OPEN, 202 zkOrd.getVersionOfOfflineNode()) == -1) { 203 LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " + 204 "It's likely that the master already timed out this open " + 205 "attempt, and thus another RS already has the region."); 206 } else { 207 result = true; 208 } 209 } catch (KeeperException e) { 210 LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e); 211 } 212 return result; 213 } 214 isGoodVersion(ZkOpenRegionDetails zkOrd)215 private boolean isGoodVersion(ZkOpenRegionDetails zkOrd) { 216 return zkOrd.getVersion() != -1; 217 } 218 219 /** 220 * This is not guaranteed to succeed, we just do our best. 221 * @param hri Region we're working on. 222 * @return whether znode is successfully transitioned to FAILED_OPEN state. 223 */ 224 @Override tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri, OpenRegionDetails ord)225 public boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri, 226 OpenRegionDetails ord) { 227 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; 228 boolean result = false; 229 final String name = hri.getRegionNameAsString(); 230 try { 231 LOG.info("Opening of region " + hri + " failed, transitioning" + 232 " from OPENING to FAILED_OPEN in ZK, expecting version " + zkOrd.getVersion()); 233 if (ZKAssign.transitionNode( 234 watcher, hri, 235 zkOrd.getServerName(), 236 EventType.RS_ZK_REGION_OPENING, 237 EventType.RS_ZK_REGION_FAILED_OPEN, 238 zkOrd.getVersion()) == -1) { 239 LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " + 240 "It's likely that the master already timed out this open " + 241 "attempt, and thus another RS already has the region."); 242 } else { 243 result = true; 244 } 245 } catch (KeeperException e) { 246 LOG.error("Failed transitioning node " + name + 247 " from OPENING to FAILED_OPEN", e); 248 } 249 return result; 250 } 251 252 /** 253 * Parse ZK-related fields from request. 254 */ 255 @Override parseFromProtoRequest( AdminProtos.OpenRegionRequest.RegionOpenInfo regionOpenInfo)256 public OpenRegionCoordination.OpenRegionDetails parseFromProtoRequest( 257 AdminProtos.OpenRegionRequest.RegionOpenInfo regionOpenInfo) { 258 ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd = 259 new ZkOpenRegionCoordination.ZkOpenRegionDetails(); 260 261 int versionOfOfflineNode = -1; 262 if (regionOpenInfo.hasVersionOfOfflineNode()) { 263 versionOfOfflineNode = regionOpenInfo.getVersionOfOfflineNode(); 264 } 265 zkCrd.setVersionOfOfflineNode(versionOfOfflineNode); 266 zkCrd.setServerName(coordination.getServer().getServerName()); 267 268 return zkCrd; 269 } 270 271 /** 272 * No ZK tracking will be performed for that case. 273 * This method should be used when we want to construct CloseRegionDetails, 274 * but don't want any coordination on that (when it's initiated by regionserver), 275 * so no znode state transitions will be performed. 276 */ 277 @Override getDetailsForNonCoordinatedOpening()278 public OpenRegionCoordination.OpenRegionDetails getDetailsForNonCoordinatedOpening() { 279 ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd = 280 new ZkOpenRegionCoordination.ZkOpenRegionDetails(); 281 zkCrd.setVersionOfOfflineNode(-1); 282 zkCrd.setServerName(coordination.getServer().getServerName()); 283 284 return zkCrd; 285 } 286 287 //-------------------------- 288 // HMaster-side operations 289 //-------------------------- 290 @Override commitOpenOnMasterSide(AssignmentManager assignmentManager, HRegionInfo regionInfo, OpenRegionDetails ord)291 public boolean commitOpenOnMasterSide(AssignmentManager assignmentManager, 292 HRegionInfo regionInfo, 293 OpenRegionDetails ord) { 294 boolean committedSuccessfully = true; 295 296 // Code to defend against case where we get SPLIT before region open 297 // processing completes; temporary till we make SPLITs go via zk -- 0.92. 298 RegionState regionState = assignmentManager.getRegionStates() 299 .getRegionTransitionState(regionInfo.getEncodedName()); 300 boolean openedNodeDeleted = false; 301 if (regionState != null && regionState.isOpened()) { 302 openedNodeDeleted = deleteOpenedNode(regionInfo, ord); 303 if (!openedNodeDeleted) { 304 LOG.error("Znode of region " + regionInfo.getShortNameToLog() + " could not be deleted."); 305 } 306 } else { 307 LOG.warn("Skipping the onlining of " + regionInfo.getShortNameToLog() + 308 " because regions is NOT in RIT -- presuming this is because it SPLIT"); 309 } 310 if (!openedNodeDeleted) { 311 if (assignmentManager.getTableStateManager().isTableState(regionInfo.getTable(), 312 ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING)) { 313 debugLog(regionInfo, "Opened region " 314 + regionInfo.getShortNameToLog() + " but " 315 + "this table is disabled, triggering close of region"); 316 committedSuccessfully = false; 317 } 318 } 319 320 return committedSuccessfully; 321 } 322 deleteOpenedNode(HRegionInfo regionInfo, OpenRegionDetails ord)323 private boolean deleteOpenedNode(HRegionInfo regionInfo, OpenRegionDetails ord) { 324 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord; 325 int expectedVersion = zkOrd.getVersion(); 326 327 debugLog(regionInfo, "Handling OPENED of " + 328 regionInfo.getShortNameToLog() + " from " + zkOrd.getServerName().toString() + 329 "; deleting unassigned node"); 330 try { 331 // delete the opened znode only if the version matches. 332 return ZKAssign.deleteNode(this.coordination.getServer().getZooKeeper(), 333 regionInfo.getEncodedName(), EventType.RS_ZK_REGION_OPENED, expectedVersion); 334 } catch(KeeperException.NoNodeException e){ 335 // Getting no node exception here means that already the region has been opened. 336 LOG.warn("The znode of the region " + regionInfo.getShortNameToLog() + 337 " would have already been deleted"); 338 return false; 339 } catch (KeeperException e) { 340 this.coordination.getServer().abort("Error deleting OPENED node in ZK (" + 341 regionInfo.getRegionNameAsString() + ")", e); 342 } 343 return false; 344 } 345 debugLog(HRegionInfo region, String string)346 private void debugLog(HRegionInfo region, String string) { 347 if (region.isMetaTable()) { 348 LOG.info(string); 349 } else { 350 LOG.debug(string); 351 } 352 } 353 354 // Additional classes and helper methods 355 356 /** 357 * ZK-based implementation. Has details about whether the state transition should be 358 * reflected in ZK, as well as expected version of znode. 359 */ 360 public static class ZkOpenRegionDetails implements OpenRegionCoordination.OpenRegionDetails { 361 362 // We get version of our znode at start of open process and monitor it across 363 // the total open. We'll fail the open if someone hijacks our znode; we can 364 // tell this has happened if version is not as expected. 365 private volatile int version = -1; 366 367 //version of the offline node that was set by the master 368 private volatile int versionOfOfflineNode = -1; 369 370 /** 371 * Server name the handler is running on. 372 */ 373 private ServerName serverName; 374 ZkOpenRegionDetails()375 public ZkOpenRegionDetails() { 376 } 377 ZkOpenRegionDetails(int versionOfOfflineNode)378 public ZkOpenRegionDetails(int versionOfOfflineNode) { 379 this.versionOfOfflineNode = versionOfOfflineNode; 380 } 381 getVersionOfOfflineNode()382 public int getVersionOfOfflineNode() { 383 return versionOfOfflineNode; 384 } 385 setVersionOfOfflineNode(int versionOfOfflineNode)386 public void setVersionOfOfflineNode(int versionOfOfflineNode) { 387 this.versionOfOfflineNode = versionOfOfflineNode; 388 } 389 getVersion()390 public int getVersion() { 391 return version; 392 } 393 setVersion(int version)394 public void setVersion(int version) { 395 this.version = version; 396 } 397 398 @Override getServerName()399 public ServerName getServerName() { 400 return serverName; 401 } 402 403 @Override setServerName(ServerName serverName)404 public void setServerName(ServerName serverName) { 405 this.serverName = serverName; 406 } 407 } 408 isRegionStillOpening(HRegionInfo regionInfo, RegionServerServices rsServices)409 private boolean isRegionStillOpening(HRegionInfo regionInfo, RegionServerServices rsServices) { 410 byte[] encodedName = regionInfo.getEncodedNameAsBytes(); 411 Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName); 412 return Boolean.TRUE.equals(action); // true means opening for RIT 413 } 414 } 415