1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 package org.apache.hadoop.hbase.master;
19 
20 import java.io.IOException;
21 import java.util.Arrays;
22 
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.hbase.Cell;
28 import org.apache.hadoop.hbase.HConstants;
29 import org.apache.hadoop.hbase.HRegionInfo;
30 import org.apache.hadoop.hbase.HRegionLocation;
31 import org.apache.hadoop.hbase.RegionLocations;
32 import org.apache.hadoop.hbase.Server;
33 import org.apache.hadoop.hbase.ServerName;
34 import org.apache.hadoop.hbase.TableName;
35 import org.apache.hadoop.hbase.MetaTableAccessor;
36 import org.apache.hadoop.hbase.client.Put;
37 import org.apache.hadoop.hbase.client.Result;
38 import org.apache.hadoop.hbase.master.RegionState.State;
39 import org.apache.hadoop.hbase.regionserver.Region;
40 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.hbase.util.ConfigUtil;
43 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
44 import org.apache.hadoop.hbase.util.MultiHConnection;
45 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
46 import org.apache.zookeeper.KeeperException;
47 
48 import com.google.common.base.Preconditions;
49 
50 /**
51  * A helper to persist region state in meta. We may change this class
52  * to StateStore later if we also use it to store other states in meta
53  */
54 @InterfaceAudience.Private
55 public class RegionStateStore {
56   private static final Log LOG = LogFactory.getLog(RegionStateStore.class);
57 
58   /** The delimiter for meta columns for replicaIds > 0 */
59   protected static final char META_REPLICA_ID_DELIMITER = '_';
60 
61   private volatile Region metaRegion;
62   private MultiHConnection multiHConnection;
63   private volatile boolean initialized;
64 
65   private final boolean noPersistence;
66   private final Server server;
67 
68   /**
69    * Returns the {@link ServerName} from catalog table {@link Result}
70    * where the region is transitioning. It should be the same as
71    * {@link HRegionInfo#getServerName(Result)} if the server is at OPEN state.
72    * @param r Result to pull the transitioning server name from
73    * @return A ServerName instance or {@link HRegionInfo#getServerName(Result)}
74    * if necessary fields not found or empty.
75    */
getRegionServer(final Result r, int replicaId)76   static ServerName getRegionServer(final Result r, int replicaId) {
77     Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getServerNameColumn(replicaId));
78     if (cell == null || cell.getValueLength() == 0) {
79       RegionLocations locations = MetaTableAccessor.getRegionLocations(r);
80       if (locations != null) {
81         HRegionLocation location = locations.getRegionLocation(replicaId);
82         if (location != null) {
83           return location.getServerName();
84         }
85       }
86       return null;
87     }
88     return ServerName.parseServerName(Bytes.toString(cell.getValueArray(),
89       cell.getValueOffset(), cell.getValueLength()));
90   }
91 
getServerNameColumn(int replicaId)92   private static byte[] getServerNameColumn(int replicaId) {
93     return replicaId == 0
94         ? HConstants.SERVERNAME_QUALIFIER
95         : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
96           + String.format(HRegionInfo.REPLICA_ID_FORMAT, replicaId));
97   }
98 
99   /**
100    * Pull the region state from a catalog table {@link Result}.
101    * @param r Result to pull the region state from
102    * @return the region state, or OPEN if there's no value written.
103    */
getRegionState(final Result r, int replicaId)104   static State getRegionState(final Result r, int replicaId) {
105     Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(replicaId));
106     if (cell == null || cell.getValueLength() == 0) return State.OPEN;
107     return State.valueOf(Bytes.toString(cell.getValueArray(),
108       cell.getValueOffset(), cell.getValueLength()));
109   }
110 
getStateColumn(int replicaId)111   private static byte[] getStateColumn(int replicaId) {
112     return replicaId == 0
113         ? HConstants.STATE_QUALIFIER
114         : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
115           + String.format(HRegionInfo.REPLICA_ID_FORMAT, replicaId));
116   }
117 
118   /**
119    * Check if we should persist a state change in meta. Generally it's
120    * better to persist all state changes. However, we should not do that
121    * if the region is not in meta at all. Based on the state and the
122    * previous state, we can identify if a user region has an entry
123    * in meta. For example, merged regions are deleted from meta;
124    * New merging parents, or splitting daughters are
125    * not created in meta yet.
126    */
shouldPersistStateChange( HRegionInfo hri, RegionState state, RegionState oldState)127   private boolean shouldPersistStateChange(
128       HRegionInfo hri, RegionState state, RegionState oldState) {
129     return !hri.isMetaRegion() && !RegionStates.isOneOfStates(
130       state, State.MERGING_NEW, State.SPLITTING_NEW, State.MERGED)
131       && !(RegionStates.isOneOfStates(state, State.OFFLINE)
132         && RegionStates.isOneOfStates(oldState, State.MERGING_NEW,
133           State.SPLITTING_NEW, State.MERGED));
134   }
135 
RegionStateStore(final Server server)136   RegionStateStore(final Server server) {
137     Configuration conf = server.getConfiguration();
138     // No need to persist if using ZK but not migrating
139     noPersistence = ConfigUtil.useZKForAssignment(conf)
140       && !conf.getBoolean("hbase.assignment.usezk.migrating", false);
141     this.server = server;
142     initialized = false;
143   }
144 
start()145   void start() throws IOException {
146     if (!noPersistence) {
147       if (server instanceof RegionServerServices) {
148         metaRegion = ((RegionServerServices)server).getFromOnlineRegions(
149           HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
150       }
151       if (metaRegion == null) {
152         Configuration conf = server.getConfiguration();
153         // Config to determine the no of HConnections to META.
154         // A single HConnection should be sufficient in most cases. Only if
155         // you are doing lot of writes (>1M) to META,
156         // increasing this value might improve the write throughput.
157         multiHConnection =
158             new MultiHConnection(conf, conf.getInt("hbase.regionstatestore.meta.connection", 1));
159       }
160     }
161     initialized = true;
162   }
163 
stop()164   void stop() {
165     initialized = false;
166     if (multiHConnection != null) {
167       multiHConnection.close();
168     }
169   }
170 
updateRegionState(long openSeqNum, RegionState newState, RegionState oldState)171   void updateRegionState(long openSeqNum,
172       RegionState newState, RegionState oldState) {
173 
174     if (noPersistence) {
175       return;
176     }
177 
178     HRegionInfo hri = newState.getRegion();
179     try {
180        // Update meta before checking for initialization. Meta state stored in zk.
181       if (hri.isMetaRegion()) {
182         // persist meta state in MetaTableLocator (which in turn is zk storage currently)
183         try {
184           MetaTableLocator.setMetaLocation(server.getZooKeeper(),
185             newState.getServerName(), hri.getReplicaId(), newState.getState());
186           return; // Done
187         } catch (KeeperException e) {
188           throw new IOException("Failed to update meta ZNode", e);
189         }
190       }
191 
192     if (!initialized || !shouldPersistStateChange(hri, newState, oldState)) {
193       return;
194     }
195 
196     ServerName oldServer = oldState != null ? oldState.getServerName() : null;
197     ServerName serverName = newState.getServerName();
198     State state = newState.getState();
199 
200       int replicaId = hri.getReplicaId();
201       Put put = new Put(MetaTableAccessor.getMetaKeyForRegion(hri));
202       StringBuilder info = new StringBuilder("Updating hbase:meta row ");
203       info.append(hri.getRegionNameAsString()).append(" with state=").append(state);
204       if (serverName != null && !serverName.equals(oldServer)) {
205         put.addImmutable(HConstants.CATALOG_FAMILY, getServerNameColumn(replicaId),
206           Bytes.toBytes(serverName.getServerName()));
207         info.append(", sn=").append(serverName);
208       }
209       if (openSeqNum >= 0) {
210         Preconditions.checkArgument(state == State.OPEN
211           && serverName != null, "Open region should be on a server");
212         MetaTableAccessor.addLocation(put, serverName, openSeqNum, -1, replicaId);
213         info.append(", openSeqNum=").append(openSeqNum);
214         info.append(", server=").append(serverName);
215       }
216       put.addImmutable(HConstants.CATALOG_FAMILY, getStateColumn(replicaId),
217         Bytes.toBytes(state.name()));
218       LOG.info(info);
219 
220       // Persist the state change to meta
221       if (metaRegion != null) {
222         try {
223           // Assume meta is pinned to master.
224           // At least, that's what we want.
225           metaRegion.put(put);
226           return; // Done here
227         } catch (Throwable t) {
228           // In unit tests, meta could be moved away by intention
229           // So, the shortcut is gone. We won't try to establish the
230           // shortcut any more because we prefer meta to be pinned
231           // to the master
232           synchronized (this) {
233             if (metaRegion != null) {
234               LOG.info("Meta region shortcut failed", t);
235               if (multiHConnection == null) {
236                 multiHConnection = new MultiHConnection(server.getConfiguration(), 1);
237               }
238               metaRegion = null;
239             }
240           }
241         }
242       }
243       // Called when meta is not on master
244       multiHConnection.processBatchCallback(Arrays.asList(put), TableName.META_TABLE_NAME, null, null);
245 
246     } catch (IOException ioe) {
247       LOG.error("Failed to persist region state " + newState, ioe);
248       server.abort("Failed to update region location", ioe);
249     }
250   }
251 
splitRegion(HRegionInfo p, HRegionInfo a, HRegionInfo b, ServerName sn, int regionReplication)252   void splitRegion(HRegionInfo p,
253       HRegionInfo a, HRegionInfo b, ServerName sn, int regionReplication) throws IOException {
254     MetaTableAccessor.splitRegion(server.getConnection(), p, a, b, sn, regionReplication);
255   }
256 
mergeRegions(HRegionInfo p, HRegionInfo a, HRegionInfo b, ServerName sn, int regionReplication)257   void mergeRegions(HRegionInfo p,
258       HRegionInfo a, HRegionInfo b, ServerName sn, int regionReplication) throws IOException {
259     MetaTableAccessor.mergeRegions(server.getConnection(), p, a, b, sn, regionReplication,
260     		EnvironmentEdgeManager.currentTime());
261   }
262 }
263