1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.net; 19 20 import org.apache.hadoop.classification.InterfaceAudience; 21 import org.apache.hadoop.classification.InterfaceStability; 22 23 /** 24 * The class extends NetworkTopology to represents a cluster of computer with 25 * a 4-layers hierarchical network topology. 26 * In this network topology, leaves represent data nodes (computers) and inner 27 * nodes represent switches/routers that manage traffic in/out of data centers, 28 * racks or physical host (with virtual switch). 29 * 30 * @see NetworkTopology 31 */ 32 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 33 @InterfaceStability.Unstable 34 public class NetworkTopologyWithNodeGroup extends NetworkTopology { 35 36 public final static String DEFAULT_NODEGROUP = "/default-nodegroup"; 37 NetworkTopologyWithNodeGroup()38 public NetworkTopologyWithNodeGroup() { 39 clusterMap = new InnerNodeWithNodeGroup(InnerNode.ROOT); 40 } 41 42 @Override getNodeForNetworkLocation(Node node)43 protected Node getNodeForNetworkLocation(Node node) { 44 // if node only with default rack info, here we need to add default 45 // nodegroup info 46 if (NetworkTopology.DEFAULT_RACK.equals(node.getNetworkLocation())) { 47 node.setNetworkLocation(node.getNetworkLocation() 48 + DEFAULT_NODEGROUP); 49 } 50 Node nodeGroup = getNode(node.getNetworkLocation()); 51 if (nodeGroup == null) { 52 nodeGroup = new InnerNodeWithNodeGroup(node.getNetworkLocation()); 53 } 54 return getNode(nodeGroup.getNetworkLocation()); 55 } 56 57 @Override getRack(String loc)58 public String getRack(String loc) { 59 netlock.readLock().lock(); 60 try { 61 loc = InnerNode.normalize(loc); 62 Node locNode = getNode(loc); 63 if (locNode instanceof InnerNodeWithNodeGroup) { 64 InnerNodeWithNodeGroup node = (InnerNodeWithNodeGroup) locNode; 65 if (node.isRack()) { 66 return loc; 67 } else if (node.isNodeGroup()) { 68 return node.getNetworkLocation(); 69 } else { 70 // may be a data center 71 return null; 72 } 73 } else { 74 // not in cluster map, don't handle it 75 return loc; 76 } 77 } finally { 78 netlock.readLock().unlock(); 79 } 80 } 81 82 /** 83 * Given a string representation of a node group for a specific network 84 * location 85 * 86 * @param loc 87 * a path-like string representation of a network location 88 * @return a node group string 89 */ getNodeGroup(String loc)90 public String getNodeGroup(String loc) { 91 netlock.readLock().lock(); 92 try { 93 loc = InnerNode.normalize(loc); 94 Node locNode = getNode(loc); 95 if (locNode instanceof InnerNodeWithNodeGroup) { 96 InnerNodeWithNodeGroup node = (InnerNodeWithNodeGroup) locNode; 97 if (node.isNodeGroup()) { 98 return loc; 99 } else if (node.isRack()) { 100 // not sure the node group for a rack 101 return null; 102 } else { 103 // may be a leaf node 104 return getNodeGroup(node.getNetworkLocation()); 105 } 106 } else { 107 // not in cluster map, don't handle it 108 return loc; 109 } 110 } finally { 111 netlock.readLock().unlock(); 112 } 113 } 114 115 @Override isOnSameRack( Node node1, Node node2)116 public boolean isOnSameRack( Node node1, Node node2) { 117 if (node1 == null || node2 == null || 118 node1.getParent() == null || node2.getParent() == null) { 119 return false; 120 } 121 122 netlock.readLock().lock(); 123 try { 124 return isSameParents(node1.getParent(), node2.getParent()); 125 } finally { 126 netlock.readLock().unlock(); 127 } 128 } 129 130 /** 131 * Check if two nodes are on the same node group (hypervisor) The 132 * assumption here is: each nodes are leaf nodes. 133 * 134 * @param node1 135 * one node (can be null) 136 * @param node2 137 * another node (can be null) 138 * @return true if node1 and node2 are on the same node group; false 139 * otherwise 140 * @exception IllegalArgumentException 141 * when either node1 or node2 is null, or node1 or node2 do 142 * not belong to the cluster 143 */ 144 @Override isOnSameNodeGroup(Node node1, Node node2)145 public boolean isOnSameNodeGroup(Node node1, Node node2) { 146 if (node1 == null || node2 == null) { 147 return false; 148 } 149 netlock.readLock().lock(); 150 try { 151 return isSameParents(node1, node2); 152 } finally { 153 netlock.readLock().unlock(); 154 } 155 } 156 157 /** 158 * Check if network topology is aware of NodeGroup 159 */ 160 @Override isNodeGroupAware()161 public boolean isNodeGroupAware() { 162 return true; 163 } 164 165 /** Add a leaf node 166 * Update node counter & rack counter if necessary 167 * @param node node to be added; can be null 168 * @exception IllegalArgumentException if add a node to a leave 169 * or node to be added is not a leaf 170 */ 171 @Override add(Node node)172 public void add(Node node) { 173 if (node==null) return; 174 if( node instanceof InnerNode ) { 175 throw new IllegalArgumentException( 176 "Not allow to add an inner node: "+NodeBase.getPath(node)); 177 } 178 netlock.writeLock().lock(); 179 try { 180 Node rack = null; 181 182 // if node only with default rack info, here we need to add default 183 // nodegroup info 184 if (NetworkTopology.DEFAULT_RACK.equals(node.getNetworkLocation())) { 185 node.setNetworkLocation(node.getNetworkLocation() + 186 NetworkTopologyWithNodeGroup.DEFAULT_NODEGROUP); 187 } 188 Node nodeGroup = getNode(node.getNetworkLocation()); 189 if (nodeGroup == null) { 190 nodeGroup = new InnerNodeWithNodeGroup(node.getNetworkLocation()); 191 } 192 rack = getNode(nodeGroup.getNetworkLocation()); 193 194 // rack should be an innerNode and with parent. 195 // note: rack's null parent case is: node's topology only has one layer, 196 // so rack is recognized as "/" and no parent. 197 // This will be recognized as a node with fault topology. 198 if (rack != null && 199 (!(rack instanceof InnerNode) || rack.getParent() == null)) { 200 throw new IllegalArgumentException("Unexpected data node " 201 + node.toString() 202 + " at an illegal network location"); 203 } 204 if (clusterMap.add(node)) { 205 LOG.info("Adding a new node: " + NodeBase.getPath(node)); 206 if (rack == null) { 207 // We only track rack number here 208 numOfRacks++; 209 } 210 } 211 if(LOG.isDebugEnabled()) { 212 LOG.debug("NetworkTopology became:\n" + this.toString()); 213 } 214 } finally { 215 netlock.writeLock().unlock(); 216 } 217 } 218 219 /** Remove a node 220 * Update node counter and rack counter if necessary 221 * @param node node to be removed; can be null 222 */ 223 @Override remove(Node node)224 public void remove(Node node) { 225 if (node==null) return; 226 if( node instanceof InnerNode ) { 227 throw new IllegalArgumentException( 228 "Not allow to remove an inner node: "+NodeBase.getPath(node)); 229 } 230 LOG.info("Removing a node: "+NodeBase.getPath(node)); 231 netlock.writeLock().lock(); 232 try { 233 if (clusterMap.remove(node)) { 234 Node nodeGroup = getNode(node.getNetworkLocation()); 235 if (nodeGroup == null) { 236 nodeGroup = new InnerNode(node.getNetworkLocation()); 237 } 238 InnerNode rack = (InnerNode)getNode(nodeGroup.getNetworkLocation()); 239 if (rack == null) { 240 numOfRacks--; 241 } 242 } 243 if(LOG.isDebugEnabled()) { 244 LOG.debug("NetworkTopology became:\n" + this.toString()); 245 } 246 } finally { 247 netlock.writeLock().unlock(); 248 } 249 } 250 251 @Override getWeight(Node reader, Node node)252 protected int getWeight(Node reader, Node node) { 253 // 0 is local, 1 is same node group, 2 is same rack, 3 is off rack 254 // Start off by initializing to off rack 255 int weight = 3; 256 if (reader != null) { 257 if (reader.equals(node)) { 258 weight = 0; 259 } else if (isOnSameNodeGroup(reader, node)) { 260 weight = 1; 261 } else if (isOnSameRack(reader, node)) { 262 weight = 2; 263 } 264 } 265 return weight; 266 } 267 268 /** 269 * Sort nodes array by their distances to <i>reader</i>. 270 * <p/> 271 * This is the same as {@link NetworkTopology#sortByDistance(Node, Node[], 272 * int)} except with a four-level network topology which contains the 273 * additional network distance of a "node group" which is between local and 274 * same rack. 275 * 276 * @param reader Node where data will be read 277 * @param nodes Available replicas with the requested data 278 * @param activeLen Number of active nodes at the front of the array 279 */ 280 @Override sortByDistance(Node reader, Node[] nodes, int activeLen)281 public void sortByDistance(Node reader, Node[] nodes, int activeLen) { 282 // If reader is not a datanode (not in NetworkTopology tree), we need to 283 // replace this reader with a sibling leaf node in tree. 284 if (reader != null && !this.contains(reader)) { 285 Node nodeGroup = getNode(reader.getNetworkLocation()); 286 if (nodeGroup != null && nodeGroup instanceof InnerNode) { 287 InnerNode parentNode = (InnerNode) nodeGroup; 288 // replace reader with the first children of its parent in tree 289 reader = parentNode.getLeaf(0, null); 290 } else { 291 return; 292 } 293 } 294 super.sortByDistance(reader, nodes, activeLen); 295 } 296 297 /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack 298 * or physical host. Different from a leaf node, it has non-null children. 299 */ 300 static class InnerNodeWithNodeGroup extends InnerNode { InnerNodeWithNodeGroup(String name, String location, InnerNode parent, int level)301 public InnerNodeWithNodeGroup(String name, String location, 302 InnerNode parent, int level) { 303 super(name, location, parent, level); 304 } 305 InnerNodeWithNodeGroup(String name, String location)306 public InnerNodeWithNodeGroup(String name, String location) { 307 super(name, location); 308 } 309 InnerNodeWithNodeGroup(String path)310 public InnerNodeWithNodeGroup(String path) { 311 super(path); 312 } 313 314 @Override isRack()315 boolean isRack() { 316 // it is node group 317 if (getChildren().isEmpty()) { 318 return false; 319 } 320 321 Node firstChild = children.get(0); 322 323 if (firstChild instanceof InnerNode) { 324 Node firstGrandChild = (((InnerNode) firstChild).children).get(0); 325 if (firstGrandChild instanceof InnerNode) { 326 // it is datacenter 327 return false; 328 } else { 329 return true; 330 } 331 } 332 return false; 333 } 334 335 /** 336 * Judge if this node represents a node group 337 * 338 * @return true if it has no child or its children are not InnerNodes 339 */ isNodeGroup()340 boolean isNodeGroup() { 341 if (children.isEmpty()) { 342 return true; 343 } 344 Node firstChild = children.get(0); 345 if (firstChild instanceof InnerNode) { 346 // it is rack or datacenter 347 return false; 348 } 349 return true; 350 } 351 352 @Override isLeafParent()353 protected boolean isLeafParent() { 354 return isNodeGroup(); 355 } 356 357 @Override createParentNode(String parentName)358 protected InnerNode createParentNode(String parentName) { 359 return new InnerNodeWithNodeGroup(parentName, getPath(this), this, 360 this.getLevel() + 1); 361 } 362 363 @Override areChildrenLeaves()364 protected boolean areChildrenLeaves() { 365 return isNodeGroup(); 366 } 367 } 368 } 369