1 /**
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.apache.hadoop.hbase.master;
20 
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import org.apache.hadoop.hbase.classification.InterfaceAudience;
24 import org.apache.hadoop.hbase.ServerName;
25 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
26 import org.apache.hadoop.hbase.util.Pair;
27 
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.Date;
32 import java.util.HashMap;
33 import java.util.HashSet;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.Map;
37 import java.util.Set;
38 
39 /**
40  * Class to hold dead servers list and utility querying dead server list.
41  * On znode expiration, servers are added here.
42  */
43 @InterfaceAudience.Private
44 public class DeadServer {
45   private static final Log LOG = LogFactory.getLog(DeadServer.class);
46 
47   /**
48    * Set of known dead servers.  On znode expiration, servers are added here.
49    * This is needed in case of a network partitioning where the server's lease
50    * expires, but the server is still running. After the network is healed,
51    * and it's server logs are recovered, it will be told to call server startup
52    * because by then, its regions have probably been reassigned.
53    */
54   private final Map<ServerName, Long> deadServers = new HashMap<ServerName, Long>();
55 
56   /**
57    * Number of dead servers currently being processed
58    */
59   private int numProcessing = 0;
60 
61   /**
62    * Whether a dead server is being processed currently.
63    */
64   private boolean processing = false;
65 
66   /**
67    * A dead server that comes back alive has a different start code. The new start code should be
68    *  greater than the old one, but we don't take this into account in this method.
69    *
70    * @param newServerName Servername as either <code>host:port</code> or
71    *                      <code>host,port,startcode</code>.
72    * @return true if this server was dead before and coming back alive again
73    */
cleanPreviousInstance(final ServerName newServerName)74   public synchronized boolean cleanPreviousInstance(final ServerName newServerName) {
75     Iterator<ServerName> it = deadServers.keySet().iterator();
76     while (it.hasNext()) {
77       ServerName sn = it.next();
78       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
79         it.remove();
80         return true;
81       }
82     }
83 
84     return false;
85   }
86 
87   /**
88    * @param serverName server name.
89    * @return true if this server is on the dead servers list false otherwise
90    */
isDeadServer(final ServerName serverName)91   public synchronized boolean isDeadServer(final ServerName serverName) {
92     return deadServers.containsKey(serverName);
93   }
94 
95   /**
96    * Checks if there are currently any dead servers being processed by the
97    * master.  Returns true if at least one region server is currently being
98    * processed as dead.
99    *
100    * @return true if any RS are being processed as dead
101    */
areDeadServersInProgress()102   public synchronized boolean areDeadServersInProgress() { return processing; }
103 
copyServerNames()104   public synchronized Set<ServerName> copyServerNames() {
105     Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
106     clone.addAll(deadServers.keySet());
107     return clone;
108   }
109 
110   /**
111    * Adds the server to the dead server list if it's not there already.
112    * @param sn the server name
113    */
add(ServerName sn)114   public synchronized void add(ServerName sn) {
115     processing = true;
116     if (!deadServers.containsKey(sn)){
117       deadServers.put(sn, EnvironmentEdgeManager.currentTime());
118     }
119   }
120 
121   /**
122    * Notify that we started processing this dead server.
123    * @param sn ServerName for the dead server.
124    */
notifyServer(ServerName sn)125   public synchronized void notifyServer(ServerName sn) {
126     if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); }
127     processing = true;
128     numProcessing++;
129   }
130 
finish(ServerName sn)131   public synchronized void finish(ServerName sn) {
132     numProcessing--;
133     if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing);
134 
135     assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative";
136 
137     if (numProcessing < 0) {
138       LOG.error("Number of dead servers in processing = " + numProcessing
139           + ". Something went wrong, this should always be non-negative.");
140       numProcessing = 0;
141     }
142     if (numProcessing == 0) { processing = false; }
143   }
144 
size()145   public synchronized int size() {
146     return deadServers.size();
147   }
148 
isEmpty()149   public synchronized boolean isEmpty() {
150     return deadServers.isEmpty();
151   }
152 
cleanAllPreviousInstances(final ServerName newServerName)153   public synchronized void cleanAllPreviousInstances(final ServerName newServerName) {
154     Iterator<ServerName> it = deadServers.keySet().iterator();
155     while (it.hasNext()) {
156       ServerName sn = it.next();
157       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
158         it.remove();
159       }
160     }
161   }
162 
toString()163   public synchronized String toString() {
164     StringBuilder sb = new StringBuilder();
165     for (ServerName sn : deadServers.keySet()) {
166       if (sb.length() > 0) {
167         sb.append(", ");
168       }
169       sb.append(sn.toString());
170     }
171     return sb.toString();
172   }
173 
174   /**
175    * Extract all the servers dead since a given time, and sort them.
176    * @param ts the time, 0 for all
177    * @return a sorted array list, by death time, lowest values first.
178    */
copyDeadServersSince(long ts)179   public synchronized List<Pair<ServerName, Long>> copyDeadServersSince(long ts){
180     List<Pair<ServerName, Long>> res =  new ArrayList<Pair<ServerName, Long>>(size());
181 
182     for (Map.Entry<ServerName, Long> entry:deadServers.entrySet()){
183       if (entry.getValue() >= ts){
184         res.add(new Pair<ServerName, Long>(entry.getKey(), entry.getValue()));
185       }
186     }
187 
188     Collections.sort(res, ServerNameDeathDateComparator);
189     return res;
190   }
191 
192   /**
193    * Get the time when a server died
194    * @param deadServerName the dead server name
195    * @return the date when the server died
196    */
getTimeOfDeath(final ServerName deadServerName)197   public synchronized Date getTimeOfDeath(final ServerName deadServerName){
198     Long time = deadServers.get(deadServerName);
199     return time == null ? null : new Date(time);
200   }
201 
202   private static Comparator<Pair<ServerName, Long>> ServerNameDeathDateComparator =
203       new Comparator<Pair<ServerName, Long>>(){
204 
205     @Override
206     public int compare(Pair<ServerName, Long> o1, Pair<ServerName, Long> o2) {
207       return o1.getSecond().compareTo(o2.getSecond());
208     }
209   };
210 }
211