1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 package org.apache.hadoop.net;
19 
20 import org.apache.hadoop.classification.InterfaceAudience;
21 import org.apache.hadoop.classification.InterfaceStability;
22 
23 /**
24  * The class extends NetworkTopology to represents a cluster of computer with
25  *  a 4-layers hierarchical network topology.
26  * In this network topology, leaves represent data nodes (computers) and inner
27  * nodes represent switches/routers that manage traffic in/out of data centers,
28  * racks or physical host (with virtual switch).
29  *
30  * @see NetworkTopology
31  */
32 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
33 @InterfaceStability.Unstable
34 public class NetworkTopologyWithNodeGroup extends NetworkTopology {
35 
36   public final static String DEFAULT_NODEGROUP = "/default-nodegroup";
37 
NetworkTopologyWithNodeGroup()38   public NetworkTopologyWithNodeGroup() {
39     clusterMap = new InnerNodeWithNodeGroup(InnerNode.ROOT);
40   }
41 
42   @Override
getNodeForNetworkLocation(Node node)43   protected Node getNodeForNetworkLocation(Node node) {
44     // if node only with default rack info, here we need to add default
45     // nodegroup info
46     if (NetworkTopology.DEFAULT_RACK.equals(node.getNetworkLocation())) {
47       node.setNetworkLocation(node.getNetworkLocation()
48           + DEFAULT_NODEGROUP);
49     }
50     Node nodeGroup = getNode(node.getNetworkLocation());
51     if (nodeGroup == null) {
52       nodeGroup = new InnerNodeWithNodeGroup(node.getNetworkLocation());
53     }
54     return getNode(nodeGroup.getNetworkLocation());
55   }
56 
57   @Override
getRack(String loc)58   public String getRack(String loc) {
59     netlock.readLock().lock();
60     try {
61       loc = InnerNode.normalize(loc);
62       Node locNode = getNode(loc);
63       if (locNode instanceof InnerNodeWithNodeGroup) {
64         InnerNodeWithNodeGroup node = (InnerNodeWithNodeGroup) locNode;
65         if (node.isRack()) {
66           return loc;
67         } else if (node.isNodeGroup()) {
68           return node.getNetworkLocation();
69         } else {
70           // may be a data center
71           return null;
72         }
73       } else {
74         // not in cluster map, don't handle it
75         return loc;
76       }
77     } finally {
78       netlock.readLock().unlock();
79     }
80   }
81 
82   /**
83    * Given a string representation of a node group for a specific network
84    * location
85    *
86    * @param loc
87    *            a path-like string representation of a network location
88    * @return a node group string
89    */
getNodeGroup(String loc)90   public String getNodeGroup(String loc) {
91     netlock.readLock().lock();
92     try {
93       loc = InnerNode.normalize(loc);
94       Node locNode = getNode(loc);
95       if (locNode instanceof InnerNodeWithNodeGroup) {
96         InnerNodeWithNodeGroup node = (InnerNodeWithNodeGroup) locNode;
97         if (node.isNodeGroup()) {
98           return loc;
99         } else if (node.isRack()) {
100           // not sure the node group for a rack
101           return null;
102         } else {
103           // may be a leaf node
104           return getNodeGroup(node.getNetworkLocation());
105         }
106       } else {
107         // not in cluster map, don't handle it
108         return loc;
109       }
110     } finally {
111       netlock.readLock().unlock();
112     }
113   }
114 
115   @Override
isOnSameRack( Node node1, Node node2)116   public boolean isOnSameRack( Node node1,  Node node2) {
117     if (node1 == null || node2 == null ||
118         node1.getParent() == null || node2.getParent() == null) {
119       return false;
120     }
121 
122     netlock.readLock().lock();
123     try {
124       return isSameParents(node1.getParent(), node2.getParent());
125     } finally {
126       netlock.readLock().unlock();
127     }
128   }
129 
130   /**
131    * Check if two nodes are on the same node group (hypervisor) The
132    * assumption here is: each nodes are leaf nodes.
133    *
134    * @param node1
135    *            one node (can be null)
136    * @param node2
137    *            another node (can be null)
138    * @return true if node1 and node2 are on the same node group; false
139    *         otherwise
140    * @exception IllegalArgumentException
141    *                when either node1 or node2 is null, or node1 or node2 do
142    *                not belong to the cluster
143    */
144   @Override
isOnSameNodeGroup(Node node1, Node node2)145   public boolean isOnSameNodeGroup(Node node1, Node node2) {
146     if (node1 == null || node2 == null) {
147       return false;
148     }
149     netlock.readLock().lock();
150     try {
151       return isSameParents(node1, node2);
152     } finally {
153       netlock.readLock().unlock();
154     }
155   }
156 
157   /**
158    * Check if network topology is aware of NodeGroup
159    */
160   @Override
isNodeGroupAware()161   public boolean isNodeGroupAware() {
162     return true;
163   }
164 
165   /** Add a leaf node
166    * Update node counter & rack counter if necessary
167    * @param node node to be added; can be null
168    * @exception IllegalArgumentException if add a node to a leave
169    *                                     or node to be added is not a leaf
170    */
171   @Override
add(Node node)172   public void add(Node node) {
173     if (node==null) return;
174     if( node instanceof InnerNode ) {
175       throw new IllegalArgumentException(
176         "Not allow to add an inner node: "+NodeBase.getPath(node));
177     }
178     netlock.writeLock().lock();
179     try {
180       Node rack = null;
181 
182       // if node only with default rack info, here we need to add default
183       // nodegroup info
184       if (NetworkTopology.DEFAULT_RACK.equals(node.getNetworkLocation())) {
185         node.setNetworkLocation(node.getNetworkLocation() +
186             NetworkTopologyWithNodeGroup.DEFAULT_NODEGROUP);
187       }
188       Node nodeGroup = getNode(node.getNetworkLocation());
189       if (nodeGroup == null) {
190         nodeGroup = new InnerNodeWithNodeGroup(node.getNetworkLocation());
191       }
192       rack = getNode(nodeGroup.getNetworkLocation());
193 
194       // rack should be an innerNode and with parent.
195       // note: rack's null parent case is: node's topology only has one layer,
196       //       so rack is recognized as "/" and no parent.
197       // This will be recognized as a node with fault topology.
198       if (rack != null &&
199           (!(rack instanceof InnerNode) || rack.getParent() == null)) {
200         throw new IllegalArgumentException("Unexpected data node "
201             + node.toString()
202             + " at an illegal network location");
203       }
204       if (clusterMap.add(node)) {
205         LOG.info("Adding a new node: " + NodeBase.getPath(node));
206         if (rack == null) {
207           // We only track rack number here
208           numOfRacks++;
209         }
210       }
211       if(LOG.isDebugEnabled()) {
212         LOG.debug("NetworkTopology became:\n" + this.toString());
213       }
214     } finally {
215       netlock.writeLock().unlock();
216     }
217   }
218 
219   /** Remove a node
220    * Update node counter and rack counter if necessary
221    * @param node node to be removed; can be null
222    */
223   @Override
remove(Node node)224   public void remove(Node node) {
225     if (node==null) return;
226     if( node instanceof InnerNode ) {
227       throw new IllegalArgumentException(
228           "Not allow to remove an inner node: "+NodeBase.getPath(node));
229     }
230     LOG.info("Removing a node: "+NodeBase.getPath(node));
231     netlock.writeLock().lock();
232     try {
233       if (clusterMap.remove(node)) {
234         Node nodeGroup = getNode(node.getNetworkLocation());
235         if (nodeGroup == null) {
236           nodeGroup = new InnerNode(node.getNetworkLocation());
237         }
238         InnerNode rack = (InnerNode)getNode(nodeGroup.getNetworkLocation());
239         if (rack == null) {
240           numOfRacks--;
241         }
242       }
243       if(LOG.isDebugEnabled()) {
244         LOG.debug("NetworkTopology became:\n" + this.toString());
245       }
246     } finally {
247       netlock.writeLock().unlock();
248     }
249   }
250 
251   @Override
getWeight(Node reader, Node node)252   protected int getWeight(Node reader, Node node) {
253     // 0 is local, 1 is same node group, 2 is same rack, 3 is off rack
254     // Start off by initializing to off rack
255     int weight = 3;
256     if (reader != null) {
257       if (reader.equals(node)) {
258         weight = 0;
259       } else if (isOnSameNodeGroup(reader, node)) {
260         weight = 1;
261       } else if (isOnSameRack(reader, node)) {
262         weight = 2;
263       }
264     }
265     return weight;
266   }
267 
268   /**
269    * Sort nodes array by their distances to <i>reader</i>.
270    * <p/>
271    * This is the same as {@link NetworkTopology#sortByDistance(Node, Node[],
272    * int)} except with a four-level network topology which contains the
273    * additional network distance of a "node group" which is between local and
274    * same rack.
275    *
276    * @param reader    Node where data will be read
277    * @param nodes     Available replicas with the requested data
278    * @param activeLen Number of active nodes at the front of the array
279    */
280   @Override
sortByDistance(Node reader, Node[] nodes, int activeLen)281   public void sortByDistance(Node reader, Node[] nodes, int activeLen) {
282     // If reader is not a datanode (not in NetworkTopology tree), we need to
283     // replace this reader with a sibling leaf node in tree.
284     if (reader != null && !this.contains(reader)) {
285       Node nodeGroup = getNode(reader.getNetworkLocation());
286       if (nodeGroup != null && nodeGroup instanceof InnerNode) {
287         InnerNode parentNode = (InnerNode) nodeGroup;
288         // replace reader with the first children of its parent in tree
289         reader = parentNode.getLeaf(0, null);
290       } else {
291         return;
292       }
293     }
294     super.sortByDistance(reader, nodes, activeLen);
295   }
296 
297   /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack
298    * or physical host. Different from a leaf node, it has non-null children.
299    */
300   static class InnerNodeWithNodeGroup extends InnerNode {
InnerNodeWithNodeGroup(String name, String location, InnerNode parent, int level)301     public InnerNodeWithNodeGroup(String name, String location,
302         InnerNode parent, int level) {
303       super(name, location, parent, level);
304     }
305 
InnerNodeWithNodeGroup(String name, String location)306     public InnerNodeWithNodeGroup(String name, String location) {
307       super(name, location);
308     }
309 
InnerNodeWithNodeGroup(String path)310     public InnerNodeWithNodeGroup(String path) {
311       super(path);
312     }
313 
314     @Override
isRack()315     boolean isRack() {
316       // it is node group
317       if (getChildren().isEmpty()) {
318         return false;
319       }
320 
321       Node firstChild = children.get(0);
322 
323       if (firstChild instanceof InnerNode) {
324         Node firstGrandChild = (((InnerNode) firstChild).children).get(0);
325         if (firstGrandChild instanceof InnerNode) {
326           // it is datacenter
327           return false;
328         } else {
329           return true;
330         }
331       }
332       return false;
333     }
334 
335     /**
336      * Judge if this node represents a node group
337      *
338      * @return true if it has no child or its children are not InnerNodes
339      */
isNodeGroup()340     boolean isNodeGroup() {
341       if (children.isEmpty()) {
342         return true;
343       }
344       Node firstChild = children.get(0);
345       if (firstChild instanceof InnerNode) {
346         // it is rack or datacenter
347         return false;
348       }
349       return true;
350     }
351 
352     @Override
isLeafParent()353     protected boolean isLeafParent() {
354       return isNodeGroup();
355     }
356 
357     @Override
createParentNode(String parentName)358     protected InnerNode createParentNode(String parentName) {
359       return new InnerNodeWithNodeGroup(parentName, getPath(this), this,
360           this.getLevel() + 1);
361     }
362 
363     @Override
areChildrenLeaves()364     protected boolean areChildrenLeaves() {
365       return isNodeGroup();
366     }
367   }
368 }
369