1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.master; 20 21 import org.apache.commons.logging.Log; 22 import org.apache.commons.logging.LogFactory; 23 import org.apache.hadoop.hbase.classification.InterfaceAudience; 24 import org.apache.hadoop.hbase.ServerName; 25 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 26 import org.apache.hadoop.hbase.util.Pair; 27 28 import java.util.ArrayList; 29 import java.util.Collections; 30 import java.util.Comparator; 31 import java.util.Date; 32 import java.util.HashMap; 33 import java.util.HashSet; 34 import java.util.Iterator; 35 import java.util.List; 36 import java.util.Map; 37 import java.util.Set; 38 39 /** 40 * Class to hold dead servers list and utility querying dead server list. 41 * On znode expiration, servers are added here. 42 */ 43 @InterfaceAudience.Private 44 public class DeadServer { 45 private static final Log LOG = LogFactory.getLog(DeadServer.class); 46 47 /** 48 * Set of known dead servers. On znode expiration, servers are added here. 49 * This is needed in case of a network partitioning where the server's lease 50 * expires, but the server is still running. After the network is healed, 51 * and it's server logs are recovered, it will be told to call server startup 52 * because by then, its regions have probably been reassigned. 53 */ 54 private final Map<ServerName, Long> deadServers = new HashMap<ServerName, Long>(); 55 56 /** 57 * Number of dead servers currently being processed 58 */ 59 private int numProcessing = 0; 60 61 /** 62 * Whether a dead server is being processed currently. 63 */ 64 private boolean processing = false; 65 66 /** 67 * A dead server that comes back alive has a different start code. The new start code should be 68 * greater than the old one, but we don't take this into account in this method. 69 * 70 * @param newServerName Servername as either <code>host:port</code> or 71 * <code>host,port,startcode</code>. 72 * @return true if this server was dead before and coming back alive again 73 */ cleanPreviousInstance(final ServerName newServerName)74 public synchronized boolean cleanPreviousInstance(final ServerName newServerName) { 75 Iterator<ServerName> it = deadServers.keySet().iterator(); 76 while (it.hasNext()) { 77 ServerName sn = it.next(); 78 if (ServerName.isSameHostnameAndPort(sn, newServerName)) { 79 it.remove(); 80 return true; 81 } 82 } 83 84 return false; 85 } 86 87 /** 88 * @param serverName server name. 89 * @return true if this server is on the dead servers list false otherwise 90 */ isDeadServer(final ServerName serverName)91 public synchronized boolean isDeadServer(final ServerName serverName) { 92 return deadServers.containsKey(serverName); 93 } 94 95 /** 96 * Checks if there are currently any dead servers being processed by the 97 * master. Returns true if at least one region server is currently being 98 * processed as dead. 99 * 100 * @return true if any RS are being processed as dead 101 */ areDeadServersInProgress()102 public synchronized boolean areDeadServersInProgress() { return processing; } 103 copyServerNames()104 public synchronized Set<ServerName> copyServerNames() { 105 Set<ServerName> clone = new HashSet<ServerName>(deadServers.size()); 106 clone.addAll(deadServers.keySet()); 107 return clone; 108 } 109 110 /** 111 * Adds the server to the dead server list if it's not there already. 112 * @param sn the server name 113 */ add(ServerName sn)114 public synchronized void add(ServerName sn) { 115 processing = true; 116 if (!deadServers.containsKey(sn)){ 117 deadServers.put(sn, EnvironmentEdgeManager.currentTime()); 118 } 119 } 120 121 /** 122 * Notify that we started processing this dead server. 123 * @param sn ServerName for the dead server. 124 */ notifyServer(ServerName sn)125 public synchronized void notifyServer(ServerName sn) { 126 if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); } 127 processing = true; 128 numProcessing++; 129 } 130 finish(ServerName sn)131 public synchronized void finish(ServerName sn) { 132 numProcessing--; 133 if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing); 134 135 assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative"; 136 137 if (numProcessing < 0) { 138 LOG.error("Number of dead servers in processing = " + numProcessing 139 + ". Something went wrong, this should always be non-negative."); 140 numProcessing = 0; 141 } 142 if (numProcessing == 0) { processing = false; } 143 } 144 size()145 public synchronized int size() { 146 return deadServers.size(); 147 } 148 isEmpty()149 public synchronized boolean isEmpty() { 150 return deadServers.isEmpty(); 151 } 152 cleanAllPreviousInstances(final ServerName newServerName)153 public synchronized void cleanAllPreviousInstances(final ServerName newServerName) { 154 Iterator<ServerName> it = deadServers.keySet().iterator(); 155 while (it.hasNext()) { 156 ServerName sn = it.next(); 157 if (ServerName.isSameHostnameAndPort(sn, newServerName)) { 158 it.remove(); 159 } 160 } 161 } 162 toString()163 public synchronized String toString() { 164 StringBuilder sb = new StringBuilder(); 165 for (ServerName sn : deadServers.keySet()) { 166 if (sb.length() > 0) { 167 sb.append(", "); 168 } 169 sb.append(sn.toString()); 170 } 171 return sb.toString(); 172 } 173 174 /** 175 * Extract all the servers dead since a given time, and sort them. 176 * @param ts the time, 0 for all 177 * @return a sorted array list, by death time, lowest values first. 178 */ copyDeadServersSince(long ts)179 public synchronized List<Pair<ServerName, Long>> copyDeadServersSince(long ts){ 180 List<Pair<ServerName, Long>> res = new ArrayList<Pair<ServerName, Long>>(size()); 181 182 for (Map.Entry<ServerName, Long> entry:deadServers.entrySet()){ 183 if (entry.getValue() >= ts){ 184 res.add(new Pair<ServerName, Long>(entry.getKey(), entry.getValue())); 185 } 186 } 187 188 Collections.sort(res, ServerNameDeathDateComparator); 189 return res; 190 } 191 192 /** 193 * Get the time when a server died 194 * @param deadServerName the dead server name 195 * @return the date when the server died 196 */ getTimeOfDeath(final ServerName deadServerName)197 public synchronized Date getTimeOfDeath(final ServerName deadServerName){ 198 Long time = deadServers.get(deadServerName); 199 return time == null ? null : new Date(time); 200 } 201 202 private static Comparator<Pair<ServerName, Long>> ServerNameDeathDateComparator = 203 new Comparator<Pair<ServerName, Long>>(){ 204 205 @Override 206 public int compare(Pair<ServerName, Long> o1, Pair<ServerName, Long> o2) { 207 return o1.getSecond().compareTo(o2.getSecond()); 208 } 209 }; 210 } 211