1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hdfs.util; 19 20 import java.io.BufferedReader; 21 import java.io.File; 22 import java.io.FileInputStream; 23 import java.io.FileNotFoundException; 24 import java.io.IOException; 25 import java.io.InputStream; 26 import java.io.InputStreamReader; 27 import java.security.DigestInputStream; 28 import java.security.MessageDigest; 29 import java.util.regex.Matcher; 30 import java.util.regex.Pattern; 31 32 import org.apache.commons.logging.Log; 33 import org.apache.commons.logging.LogFactory; 34 import org.apache.hadoop.io.IOUtils; 35 import org.apache.hadoop.io.MD5Hash; 36 import org.apache.hadoop.util.StringUtils; 37 38 import com.google.common.base.Charsets; 39 40 /** 41 * Static functions for dealing with files of the same format 42 * that the Unix "md5sum" utility writes. 43 */ 44 public abstract class MD5FileUtils { 45 private static final Log LOG = LogFactory.getLog( 46 MD5FileUtils.class); 47 48 public static final String MD5_SUFFIX = ".md5"; 49 private static final Pattern LINE_REGEX = 50 Pattern.compile("([0-9a-f]{32}) [ \\*](.+)"); 51 52 /** 53 * Verify that the previously saved md5 for the given file matches 54 * expectedMd5. 55 * @throws IOException 56 */ verifySavedMD5(File dataFile, MD5Hash expectedMD5)57 public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5) 58 throws IOException { 59 MD5Hash storedHash = readStoredMd5ForFile(dataFile); 60 // Check the hash itself 61 if (!expectedMD5.equals(storedHash)) { 62 throw new IOException( 63 "File " + dataFile + " did not match stored MD5 checksum " + 64 " (stored: " + storedHash + ", computed: " + expectedMD5); 65 } 66 } 67 68 /** 69 * Read the md5 file stored alongside the given data file 70 * and match the md5 file content. 71 * @param dataFile the file containing data 72 * @return a matcher with two matched groups 73 * where group(1) is the md5 string and group(2) is the data file path. 74 */ readStoredMd5(File md5File)75 private static Matcher readStoredMd5(File md5File) throws IOException { 76 BufferedReader reader = 77 new BufferedReader(new InputStreamReader(new FileInputStream( 78 md5File), Charsets.UTF_8)); 79 String md5Line; 80 try { 81 md5Line = reader.readLine(); 82 if (md5Line == null) { md5Line = ""; } 83 md5Line = md5Line.trim(); 84 } catch (IOException ioe) { 85 throw new IOException("Error reading md5 file at " + md5File, ioe); 86 } finally { 87 IOUtils.cleanup(LOG, reader); 88 } 89 90 Matcher matcher = LINE_REGEX.matcher(md5Line); 91 if (!matcher.matches()) { 92 throw new IOException("Invalid MD5 file " + md5File + ": the content \"" 93 + md5Line + "\" does not match the expected pattern."); 94 } 95 return matcher; 96 } 97 98 /** 99 * Read the md5 checksum stored alongside the given data file. 100 * @param dataFile the file containing data 101 * @return the checksum stored in dataFile.md5 102 */ readStoredMd5ForFile(File dataFile)103 public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException { 104 final File md5File = getDigestFileForFile(dataFile); 105 if (!md5File.exists()) { 106 return null; 107 } 108 109 final Matcher matcher = readStoredMd5(md5File); 110 String storedHash = matcher.group(1); 111 File referencedFile = new File(matcher.group(2)); 112 113 // Sanity check: Make sure that the file referenced in the .md5 file at 114 // least has the same name as the file we expect 115 if (!referencedFile.getName().equals(dataFile.getName())) { 116 throw new IOException( 117 "MD5 file at " + md5File + " references file named " + 118 referencedFile.getName() + " but we expected it to reference " + 119 dataFile); 120 } 121 return new MD5Hash(storedHash); 122 } 123 124 /** 125 * Read dataFile and compute its MD5 checksum. 126 */ computeMd5ForFile(File dataFile)127 public static MD5Hash computeMd5ForFile(File dataFile) throws IOException { 128 InputStream in = new FileInputStream(dataFile); 129 try { 130 MessageDigest digester = MD5Hash.getDigester(); 131 DigestInputStream dis = new DigestInputStream(in, digester); 132 IOUtils.copyBytes(dis, new IOUtils.NullOutputStream(), 128*1024); 133 134 return new MD5Hash(digester.digest()); 135 } finally { 136 IOUtils.closeStream(in); 137 } 138 } 139 140 /** 141 * Save the ".md5" file that lists the md5sum of another file. 142 * @param dataFile the original file whose md5 was computed 143 * @param digest the computed digest 144 * @throws IOException 145 */ saveMD5File(File dataFile, MD5Hash digest)146 public static void saveMD5File(File dataFile, MD5Hash digest) 147 throws IOException { 148 final String digestString = StringUtils.byteToHexString(digest.getDigest()); 149 saveMD5File(dataFile, digestString); 150 } 151 saveMD5File(File dataFile, String digestString)152 private static void saveMD5File(File dataFile, String digestString) 153 throws IOException { 154 File md5File = getDigestFileForFile(dataFile); 155 String md5Line = digestString + " *" + dataFile.getName() + "\n"; 156 157 AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File); 158 afos.write(md5Line.getBytes(Charsets.UTF_8)); 159 afos.close(); 160 161 if (LOG.isDebugEnabled()) { 162 LOG.debug("Saved MD5 " + digestString + " to " + md5File); 163 } 164 } 165 renameMD5File(File oldDataFile, File newDataFile)166 public static void renameMD5File(File oldDataFile, File newDataFile) 167 throws IOException { 168 final File fromFile = getDigestFileForFile(oldDataFile); 169 if (!fromFile.exists()) { 170 throw new FileNotFoundException(fromFile + " does not exist."); 171 } 172 173 final String digestString = readStoredMd5(fromFile).group(1); 174 saveMD5File(newDataFile, digestString); 175 176 if (!fromFile.delete()) { 177 LOG.warn("deleting " + fromFile.getAbsolutePath() + " FAILED"); 178 } 179 } 180 181 /** 182 * @return a reference to the file with .md5 suffix that will 183 * contain the md5 checksum for the given data file. 184 */ getDigestFileForFile(File file)185 public static File getDigestFileForFile(File file) { 186 return new File(file.getParentFile(), file.getName() + MD5_SUFFIX); 187 } 188 } 189