1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 package org.apache.hadoop.hdfs.tools;
19 
20 import java.io.BufferedReader;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.InputStreamReader;
24 import java.io.PrintStream;
25 import java.net.URI;
26 import java.net.URL;
27 import java.net.URLConnection;
28 import java.net.URLEncoder;
29 import java.security.PrivilegedExceptionAction;
30 
31 import org.apache.hadoop.classification.InterfaceAudience;
32 import org.apache.hadoop.conf.Configuration;
33 import org.apache.hadoop.conf.Configured;
34 import org.apache.hadoop.fs.FileSystem;
35 import org.apache.hadoop.fs.Path;
36 import org.apache.hadoop.hdfs.DFSUtil;
37 import org.apache.hadoop.hdfs.DistributedFileSystem;
38 import org.apache.hadoop.hdfs.HAUtil;
39 import org.apache.hadoop.hdfs.HdfsConfiguration;
40 import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
41 import org.apache.hadoop.hdfs.web.URLConnectionFactory;
42 import org.apache.hadoop.security.UserGroupInformation;
43 import org.apache.hadoop.security.authentication.client.AuthenticationException;
44 import org.apache.hadoop.util.StringUtils;
45 import org.apache.hadoop.util.Tool;
46 import org.apache.hadoop.util.ToolRunner;
47 
48 /**
49  * This class provides rudimentary checking of DFS volumes for errors and
50  * sub-optimal conditions.
51  * <p>The tool scans all files and directories, starting from an indicated
52  *  root path. The following abnormal conditions are detected and handled:</p>
53  * <ul>
54  * <li>files with blocks that are completely missing from all datanodes.<br/>
55  * In this case the tool can perform one of the following actions:
56  *  <ul>
57  *      <li>none ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_NONE})</li>
58  *      <li>move corrupted files to /lost+found directory on DFS
59  *      ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_MOVE}). Remaining data blocks are saved as a
60  *      block chains, representing longest consecutive series of valid blocks.</li>
61  *      <li>delete corrupted files ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_DELETE})</li>
62  *  </ul>
63  *  </li>
64  *  <li>detect files with under-replicated or over-replicated blocks</li>
65  *  </ul>
66  *  Additionally, the tool collects a detailed overall DFS statistics, and
67  *  optionally can print detailed statistics on block locations and replication
68  *  factors of each file.
69  *  The tool also provides and option to filter open files during the scan.
70  *
71  */
72 @InterfaceAudience.Private
73 public class DFSck extends Configured implements Tool {
74   static{
HdfsConfiguration.init()75     HdfsConfiguration.init();
76   }
77 
78   private static final String USAGE = "Usage: hdfs fsck <path> "
79       + "[-list-corruptfileblocks | "
80       + "[-move | -delete | -openforwrite] "
81       + "[-files [-blocks [-locations | -racks]]]] "
82       + "[-includeSnapshots] "
83       + "[-storagepolicies] [-blockId <blk_Id>]\n"
84       + "\t<path>\tstart checking from this path\n"
85       + "\t-move\tmove corrupted files to /lost+found\n"
86       + "\t-delete\tdelete corrupted files\n"
87       + "\t-files\tprint out files being checked\n"
88       + "\t-openforwrite\tprint out files opened for write\n"
89       + "\t-includeSnapshots\tinclude snapshot data if the given path"
90       + " indicates a snapshottable directory or there are "
91       + "snapshottable directories under it\n"
92       + "\t-list-corruptfileblocks\tprint out list of missing "
93       + "blocks and files they belong to\n"
94       + "\t-blocks\tprint out block report\n"
95       + "\t-locations\tprint out locations for every block\n"
96       + "\t-racks\tprint out network topology for data-node locations\n"
97       + "\t-storagepolicies\tprint out storage policy summary for the blocks\n"
98       + "\t-blockId\tprint out which file this blockId belongs to, locations"
99       + " (nodes, racks) of this block, and other diagnostics info"
100       + " (under replicated, corrupted or not, etc)\n\n"
101       + "Please Note:\n"
102       + "\t1. By default fsck ignores files opened for write, "
103       + "use -openforwrite to report such files. They are usually "
104       + " tagged CORRUPT or HEALTHY depending on their block "
105       + "allocation status\n"
106       + "\t2. Option -includeSnapshots should not be used for comparing stats,"
107       + " should be used only for HEALTH check, as this may contain duplicates"
108       + " if the same file present in both original fs tree "
109       + "and inside snapshots.";
110 
111   private final UserGroupInformation ugi;
112   private final PrintStream out;
113   private final URLConnectionFactory connectionFactory;
114   private final boolean isSpnegoEnabled;
115 
116   /**
117    * Filesystem checker.
118    * @param conf current Configuration
119    */
DFSck(Configuration conf)120   public DFSck(Configuration conf) throws IOException {
121     this(conf, System.out);
122   }
123 
DFSck(Configuration conf, PrintStream out)124   public DFSck(Configuration conf, PrintStream out) throws IOException {
125     super(conf);
126     this.ugi = UserGroupInformation.getCurrentUser();
127     this.out = out;
128     this.connectionFactory = URLConnectionFactory
129         .newDefaultURLConnectionFactory(conf);
130     this.isSpnegoEnabled = UserGroupInformation.isSecurityEnabled();
131   }
132 
133   /**
134    * Print fsck usage information
135    */
printUsage(PrintStream out)136   static void printUsage(PrintStream out) {
137     out.println(USAGE + "\n");
138     ToolRunner.printGenericCommandUsage(out);
139   }
140   @Override
run(final String[] args)141   public int run(final String[] args) throws IOException {
142     if (args.length == 0) {
143       printUsage(System.err);
144       return -1;
145     }
146 
147     try {
148       return UserGroupInformation.getCurrentUser().doAs(
149           new PrivilegedExceptionAction<Integer>() {
150             @Override
151             public Integer run() throws Exception {
152               return doWork(args);
153             }
154           });
155     } catch (InterruptedException e) {
156       throw new IOException(e);
157     }
158   }
159 
160   /*
161    * To get the list, we need to call iteratively until the server says
162    * there is no more left.
163    */
164   private Integer listCorruptFileBlocks(String dir, String baseUrl)
165       throws IOException {
166     int errCode = -1;
167     int numCorrupt = 0;
168     int cookie = 0;
169     final String noCorruptLine = "has no CORRUPT files";
170     final String noMoreCorruptLine = "has no more CORRUPT files";
171     final String cookiePrefix = "Cookie:";
172     boolean allDone = false;
173     while (!allDone) {
174       final StringBuffer url = new StringBuffer(baseUrl);
175       if (cookie > 0) {
176         url.append("&startblockafter=").append(String.valueOf(cookie));
177       }
178       URL path = new URL(url.toString());
179       URLConnection connection;
180       try {
181         connection = connectionFactory.openConnection(path, isSpnegoEnabled);
182       } catch (AuthenticationException e) {
183         throw new IOException(e);
184       }
185       InputStream stream = connection.getInputStream();
186       BufferedReader input = new BufferedReader(new InputStreamReader(
187           stream, "UTF-8"));
188       try {
189         String line = null;
190         while ((line = input.readLine()) != null) {
191           if (line.startsWith(cookiePrefix)){
192             try{
193               cookie = Integer.parseInt(line.split("\t")[1]);
194             } catch (Exception e){
195               allDone = true;
196               break;
197             }
198             continue;
199           }
200           if ((line.endsWith(noCorruptLine)) ||
201               (line.endsWith(noMoreCorruptLine)) ||
202               (line.endsWith(NamenodeFsck.NONEXISTENT_STATUS))) {
203             allDone = true;
204             break;
205           }
206           if ((line.isEmpty())
207               || (line.startsWith("FSCK started by"))
208               || (line.startsWith("The filesystem under path")))
209             continue;
210           numCorrupt++;
211           if (numCorrupt == 1) {
212             out.println("The list of corrupt files under path '"
213                 + dir + "' are:");
214           }
215           out.println(line);
216         }
217       } finally {
218         input.close();
219       }
220     }
221     out.println("The filesystem under path '" + dir + "' has "
222         + numCorrupt + " CORRUPT files");
223     if (numCorrupt == 0)
224       errCode = 0;
225     return errCode;
226   }
227 
228 
229   private Path getResolvedPath(String dir) throws IOException {
230     Configuration conf = getConf();
231     Path dirPath = new Path(dir);
232     FileSystem fs = dirPath.getFileSystem(conf);
233     return fs.resolvePath(dirPath);
234   }
235 
236   /**
237    * Derive the namenode http address from the current file system,
238    * either default or as set by "-fs" in the generic options.
239    * @return Returns http address or null if failure.
240    * @throws IOException if we can't determine the active NN address
241    */
242   private URI getCurrentNamenodeAddress(Path target) throws IOException {
243     //String nnAddress = null;
244     Configuration conf = getConf();
245 
246     //get the filesystem object to verify it is an HDFS system
247     final FileSystem fs = target.getFileSystem(conf);
248     if (!(fs instanceof DistributedFileSystem)) {
249       System.err.println("FileSystem is " + fs.getUri());
250       return null;
251     }
252 
253     return DFSUtil.getInfoServer(HAUtil.getAddressOfActive(fs), conf,
254         DFSUtil.getHttpClientScheme(conf));
255   }
256 
257   private int doWork(final String[] args) throws IOException {
258     final StringBuilder url = new StringBuilder();
259 
260     url.append("/fsck?ugi=").append(ugi.getShortUserName());
261     String dir = null;
262     boolean doListCorruptFileBlocks = false;
263     for (int idx = 0; idx < args.length; idx++) {
264       if (args[idx].equals("-move")) { url.append("&move=1"); }
265       else if (args[idx].equals("-delete")) { url.append("&delete=1"); }
266       else if (args[idx].equals("-files")) { url.append("&files=1"); }
267       else if (args[idx].equals("-openforwrite")) { url.append("&openforwrite=1"); }
268       else if (args[idx].equals("-blocks")) { url.append("&blocks=1"); }
269       else if (args[idx].equals("-locations")) { url.append("&locations=1"); }
270       else if (args[idx].equals("-racks")) { url.append("&racks=1"); }
271       else if (args[idx].equals("-storagepolicies")) { url.append("&storagepolicies=1"); }
272       else if (args[idx].equals("-list-corruptfileblocks")) {
273         url.append("&listcorruptfileblocks=1");
274         doListCorruptFileBlocks = true;
275       } else if (args[idx].equals("-includeSnapshots")) {
276         url.append("&includeSnapshots=1");
277       } else if (args[idx].equals("-blockId")) {
278         StringBuilder sb = new StringBuilder();
279         idx++;
280         while(idx < args.length && !args[idx].startsWith("-")){
281           sb.append(args[idx]);
282           sb.append(" ");
283           idx++;
284         }
285         url.append("&blockId=").append(URLEncoder.encode(sb.toString(), "UTF-8"));
286       } else if (!args[idx].startsWith("-")) {
287         if (null == dir) {
288           dir = args[idx];
289         } else {
290           System.err.println("fsck: can only operate on one path at a time '"
291               + args[idx] + "'");
292           printUsage(System.err);
293           return -1;
294         }
295 
296       } else {
297         System.err.println("fsck: Illegal option '" + args[idx] + "'");
298         printUsage(System.err);
299         return -1;
300       }
301     }
302     if (null == dir) {
303       dir = "/";
304     }
305 
306     Path dirpath = null;
307     URI namenodeAddress = null;
308     try {
309       dirpath = getResolvedPath(dir);
310       namenodeAddress = getCurrentNamenodeAddress(dirpath);
311     } catch (IOException ioe) {
312       System.err.println("FileSystem is inaccessible due to:\n"
313           + StringUtils.stringifyException(ioe));
314     }
315 
316     if (namenodeAddress == null) {
317       //Error message already output in {@link #getCurrentNamenodeAddress()}
318       System.err.println("DFSck exiting.");
319       return 0;
320     }
321 
322     url.insert(0, namenodeAddress.toString());
323     url.append("&path=").append(URLEncoder.encode(
324         Path.getPathWithoutSchemeAndAuthority(dirpath).toString(), "UTF-8"));
325     System.err.println("Connecting to namenode via " + url.toString());
326 
327     if (doListCorruptFileBlocks) {
328       return listCorruptFileBlocks(dir, url.toString());
329     }
330     URL path = new URL(url.toString());
331     URLConnection connection;
332     try {
333       connection = connectionFactory.openConnection(path, isSpnegoEnabled);
334     } catch (AuthenticationException e) {
335       throw new IOException(e);
336     }
337     InputStream stream = connection.getInputStream();
338     BufferedReader input = new BufferedReader(new InputStreamReader(
339                                               stream, "UTF-8"));
340     String line = null;
341     String lastLine = null;
342     int errCode = -1;
343     try {
344       while ((line = input.readLine()) != null) {
345         out.println(line);
346         lastLine = line;
347       }
348     } finally {
349       input.close();
350     }
351     if (lastLine.endsWith(NamenodeFsck.HEALTHY_STATUS)) {
352       errCode = 0;
353     } else if (lastLine.endsWith(NamenodeFsck.CORRUPT_STATUS)) {
354       errCode = 1;
355     } else if (lastLine.endsWith(NamenodeFsck.NONEXISTENT_STATUS)) {
356       errCode = 0;
357     } else if (lastLine.contains("Incorrect blockId format:")) {
358       errCode = 0;
359     } else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONED_STATUS)) {
360       errCode = 2;
361     } else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) {
362       errCode = 3;
363     }
364     return errCode;
365   }
366 
367   public static void main(String[] args) throws Exception {
368     // -files option is also used by GenericOptionsParser
369     // Make sure that is not the first argument for fsck
370     int res = -1;
371     if ((args.length == 0) || ("-files".equals(args[0]))) {
372       printUsage(System.err);
373       ToolRunner.printGenericCommandUsage(System.err);
374     } else if (DFSUtil.parseHelpArgument(args, USAGE, System.out, true)) {
375       res = 0;
376     } else {
377       res = ToolRunner.run(new DFSck(new HdfsConfiguration()), args);
378     }
379     System.exit(res);
380   }
381 }
382