1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hdfs.server.namenode; 19 20 import junit.framework.TestCase; 21 import java.io.*; 22 import java.util.Random; 23 import org.apache.hadoop.conf.Configuration; 24 import org.apache.hadoop.hdfs.MiniDFSCluster; 25 import org.apache.hadoop.fs.FSDataOutputStream; 26 import org.apache.hadoop.fs.FileSystem; 27 import org.apache.hadoop.fs.FileUtil; 28 import org.apache.hadoop.fs.Path; 29 30 /** 31 * This class tests various combinations of dfs.name.dir 32 * and dfs.name.edits.dir configurations. 33 */ 34 public class TestNameEditsConfigs extends TestCase { 35 static final long SEED = 0xDEADBEEFL; 36 static final int BLOCK_SIZE = 4096; 37 static final int FILE_SIZE = 8192; 38 static final int NUM_DATA_NODES = 3; 39 static final String FILE_IMAGE = "current/fsimage"; 40 static final String FILE_EDITS = "current/edits"; 41 42 short replication = 3; 43 private File base_dir = new File( 44 System.getProperty("test.build.data", "build/test/data"), "dfs/"); 45 setUp()46 protected void setUp() throws java.lang.Exception { 47 if(base_dir.exists()) 48 tearDown(); 49 } 50 tearDown()51 protected void tearDown() throws java.lang.Exception { 52 if (!FileUtil.fullyDelete(base_dir)) 53 throw new IOException("Cannot remove directory " + base_dir); 54 } 55 writeFile(FileSystem fileSys, Path name, int repl)56 private void writeFile(FileSystem fileSys, Path name, int repl) 57 throws IOException { 58 FSDataOutputStream stm = fileSys.create(name, true, 59 fileSys.getConf().getInt("io.file.buffer.size", 4096), 60 (short)repl, (long)BLOCK_SIZE); 61 byte[] buffer = new byte[FILE_SIZE]; 62 Random rand = new Random(SEED); 63 rand.nextBytes(buffer); 64 stm.write(buffer); 65 stm.close(); 66 } 67 checkImageAndEditsFilesExistence(File dir, boolean imageMustExist, boolean editsMustExist)68 void checkImageAndEditsFilesExistence(File dir, 69 boolean imageMustExist, 70 boolean editsMustExist) { 71 assertTrue(imageMustExist == new File(dir, FILE_IMAGE).exists()); 72 assertTrue(editsMustExist == new File(dir, FILE_EDITS).exists()); 73 } 74 checkFile(FileSystem fileSys, Path name, int repl)75 private void checkFile(FileSystem fileSys, Path name, int repl) 76 throws IOException { 77 assertTrue(fileSys.exists(name)); 78 int replication = fileSys.getFileStatus(name).getReplication(); 79 assertEquals("replication for " + name, repl, replication); 80 long size = fileSys.getContentSummary(name).getLength(); 81 assertEquals("file size for " + name, size, (long)FILE_SIZE); 82 } 83 cleanupFile(FileSystem fileSys, Path name)84 private void cleanupFile(FileSystem fileSys, Path name) 85 throws IOException { 86 assertTrue(fileSys.exists(name)); 87 fileSys.delete(name, true); 88 assertTrue(!fileSys.exists(name)); 89 } 90 startSecondaryNameNode(Configuration conf )91 SecondaryNameNode startSecondaryNameNode(Configuration conf 92 ) throws IOException { 93 conf.set("dfs.secondary.http.address", "0.0.0.0:0"); 94 return new SecondaryNameNode(conf); 95 } 96 97 /** 98 * Test various configuration options of dfs.name.dir and dfs.name.edits.dir 99 * The test creates files and restarts cluster with different configs. 100 * 1. Starts cluster with shared name and edits dirs 101 * 2. Restarts cluster by adding additional (different) name and edits dirs 102 * 3. Restarts cluster by removing shared name and edits dirs by allowing to 103 * start using separate name and edits dirs 104 * 4. Restart cluster by adding shared directory again, but make sure we 105 * do not read any stale image or edits. 106 * All along the test, we create and delete files at reach restart to make 107 * sure we are reading proper edits and image. 108 */ testNameEditsConfigs()109 public void testNameEditsConfigs() throws IOException { 110 Path file1 = new Path("TestNameEditsConfigs1"); 111 Path file2 = new Path("TestNameEditsConfigs2"); 112 Path file3 = new Path("TestNameEditsConfigs3"); 113 MiniDFSCluster cluster = null; 114 SecondaryNameNode secondary = null; 115 Configuration conf = null; 116 FileSystem fileSys = null; 117 File newNameDir = new File(base_dir, "name"); 118 File newEditsDir = new File(base_dir, "edits"); 119 File nameAndEdits = new File(base_dir, "name_and_edits"); 120 File checkpointNameDir = new File(base_dir, "secondname"); 121 File checkpointEditsDir = new File(base_dir, "secondedits"); 122 File checkpointNameAndEdits = new File(base_dir, "second_name_and_edits"); 123 124 // Start namenode with same dfs.name.dir and dfs.name.edits.dir 125 conf = new Configuration(); 126 conf.set("dfs.name.dir", nameAndEdits.getPath()); 127 conf.set("dfs.name.edits.dir", nameAndEdits.getPath()); 128 conf.set("fs.checkpoint.dir", checkpointNameAndEdits.getPath()); 129 conf.set("fs.checkpoint.edits.dir", checkpointNameAndEdits.getPath()); 130 replication = (short)conf.getInt("dfs.replication", 3); 131 // Manage our own dfs directories 132 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, true, false, true, null, 133 null, null, null); 134 cluster.waitActive(); 135 secondary = startSecondaryNameNode(conf); 136 fileSys = cluster.getFileSystem(); 137 138 try { 139 assertTrue(!fileSys.exists(file1)); 140 writeFile(fileSys, file1, replication); 141 checkFile(fileSys, file1, replication); 142 secondary.doCheckpoint(); 143 } finally { 144 fileSys.close(); 145 cluster.shutdown(); 146 secondary.shutdown(); 147 } 148 149 // Start namenode with additional dfs.name.dir and dfs.name.edits.dir 150 conf = new Configuration(); 151 assertTrue(newNameDir.mkdir()); 152 assertTrue(newEditsDir.mkdir()); 153 154 conf.set("dfs.name.dir", nameAndEdits.getPath() + 155 "," + newNameDir.getPath()); 156 conf.set("dfs.name.edits.dir", nameAndEdits.getPath() + 157 "," + newEditsDir.getPath()); 158 conf.set("fs.checkpoint.dir", checkpointNameDir.getPath() + 159 "," + checkpointNameAndEdits.getPath()); 160 conf.set("fs.checkpoint.edits.dir", checkpointEditsDir.getPath() + 161 "," + checkpointNameAndEdits.getPath()); 162 replication = (short)conf.getInt("dfs.replication", 3); 163 // Manage our own dfs directories. Do not format. 164 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 165 null, null, null, null); 166 cluster.waitActive(); 167 secondary = startSecondaryNameNode(conf); 168 fileSys = cluster.getFileSystem(); 169 170 try { 171 assertTrue(fileSys.exists(file1)); 172 checkFile(fileSys, file1, replication); 173 cleanupFile(fileSys, file1); 174 writeFile(fileSys, file2, replication); 175 checkFile(fileSys, file2, replication); 176 secondary.doCheckpoint(); 177 } finally { 178 fileSys.close(); 179 cluster.shutdown(); 180 secondary.shutdown(); 181 } 182 183 checkImageAndEditsFilesExistence(nameAndEdits, true, true); 184 checkImageAndEditsFilesExistence(newNameDir, true, false); 185 checkImageAndEditsFilesExistence(newEditsDir, false, true); 186 checkImageAndEditsFilesExistence(checkpointNameAndEdits, true, true); 187 checkImageAndEditsFilesExistence(checkpointNameDir, true, false); 188 checkImageAndEditsFilesExistence(checkpointEditsDir, false, true); 189 190 // Now remove common directory both have and start namenode with 191 // separate name and edits dirs 192 new File(nameAndEdits, FILE_EDITS).renameTo( 193 new File(newNameDir, FILE_EDITS)); 194 new File(nameAndEdits, FILE_IMAGE).renameTo( 195 new File(newEditsDir, FILE_IMAGE)); 196 new File(checkpointNameAndEdits, FILE_EDITS).renameTo( 197 new File(checkpointNameDir, FILE_EDITS)); 198 new File(checkpointNameAndEdits, FILE_IMAGE).renameTo( 199 new File(checkpointEditsDir, FILE_IMAGE)); 200 conf = new Configuration(); 201 conf.set("dfs.name.dir", newNameDir.getPath()); 202 conf.set("dfs.name.edits.dir", newEditsDir.getPath()); 203 conf.set("fs.checkpoint.dir", checkpointNameDir.getPath()); 204 conf.set("fs.checkpoint.edits.dir", checkpointEditsDir.getPath()); 205 replication = (short)conf.getInt("dfs.replication", 3); 206 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 207 null, null, null, null); 208 cluster.waitActive(); 209 secondary = startSecondaryNameNode(conf); 210 fileSys = cluster.getFileSystem(); 211 212 try { 213 assertTrue(!fileSys.exists(file1)); 214 assertTrue(fileSys.exists(file2)); 215 checkFile(fileSys, file2, replication); 216 cleanupFile(fileSys, file2); 217 writeFile(fileSys, file3, replication); 218 checkFile(fileSys, file3, replication); 219 secondary.doCheckpoint(); 220 } finally { 221 fileSys.close(); 222 cluster.shutdown(); 223 secondary.shutdown(); 224 } 225 226 checkImageAndEditsFilesExistence(newNameDir, true, false); 227 checkImageAndEditsFilesExistence(newEditsDir, false, true); 228 checkImageAndEditsFilesExistence(checkpointNameDir, true, false); 229 checkImageAndEditsFilesExistence(checkpointEditsDir, false, true); 230 231 // Add old name_and_edits dir. File system should not read image or edits 232 // from old dir 233 assertTrue(FileUtil.fullyDelete(new File(nameAndEdits, "current"))); 234 assertTrue(FileUtil.fullyDelete(new File(checkpointNameAndEdits, "current"))); 235 conf = new Configuration(); 236 conf.set("dfs.name.dir", nameAndEdits.getPath() + 237 "," + newNameDir.getPath()); 238 conf.set("dfs.name.edits.dir", nameAndEdits + 239 "," + newEditsDir.getPath()); 240 conf.set("fs.checkpoint.dir", checkpointNameDir.getPath() + 241 "," + checkpointNameAndEdits.getPath()); 242 conf.set("fs.checkpoint.edits.dir", checkpointEditsDir.getPath() + 243 "," + checkpointNameAndEdits.getPath()); 244 replication = (short)conf.getInt("dfs.replication", 3); 245 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 246 null, null, null, null); 247 cluster.waitActive(); 248 secondary = startSecondaryNameNode(conf); 249 fileSys = cluster.getFileSystem(); 250 251 try { 252 assertTrue(!fileSys.exists(file1)); 253 assertTrue(!fileSys.exists(file2)); 254 assertTrue(fileSys.exists(file3)); 255 checkFile(fileSys, file3, replication); 256 secondary.doCheckpoint(); 257 } finally { 258 fileSys.close(); 259 cluster.shutdown(); 260 secondary.shutdown(); 261 } 262 checkImageAndEditsFilesExistence(nameAndEdits, true, true); 263 checkImageAndEditsFilesExistence(checkpointNameAndEdits, true, true); 264 } 265 266 /** 267 * Test various configuration options of dfs.name.dir and dfs.name.edits.dir 268 * This test tries to simulate failure scenarios. 269 * 1. Start cluster with shared name and edits dir 270 * 2. Restart cluster by adding separate name and edits dirs 271 * 3. Restart cluster by removing shared name and edits dir 272 * 4. Restart cluster with old shared name and edits dir, but only latest 273 * name dir. This should fail since we dont have latest edits dir 274 * 5. Restart cluster with old shared name and edits dir, but only latest 275 * edits dir. This should fail since we dont have latest name dir 276 */ testNameEditsConfigsFailure()277 public void testNameEditsConfigsFailure() throws IOException { 278 Path file1 = new Path("TestNameEditsConfigs1"); 279 Path file2 = new Path("TestNameEditsConfigs2"); 280 Path file3 = new Path("TestNameEditsConfigs3"); 281 MiniDFSCluster cluster = null; 282 Configuration conf = null; 283 FileSystem fileSys = null; 284 File newNameDir = new File(base_dir, "name"); 285 File newEditsDir = new File(base_dir, "edits"); 286 File nameAndEdits = new File(base_dir, "name_and_edits"); 287 288 // Start namenode with same dfs.name.dir and dfs.name.edits.dir 289 conf = new Configuration(); 290 conf.set("dfs.name.dir", nameAndEdits.getPath()); 291 conf.set("dfs.name.edits.dir", nameAndEdits.getPath()); 292 replication = (short)conf.getInt("dfs.replication", 3); 293 // Manage our own dfs directories 294 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, true, false, true, null, 295 null, null, null); 296 cluster.waitActive(); 297 fileSys = cluster.getFileSystem(); 298 299 try { 300 assertTrue(!fileSys.exists(file1)); 301 writeFile(fileSys, file1, replication); 302 checkFile(fileSys, file1, replication); 303 } finally { 304 fileSys.close(); 305 cluster.shutdown(); 306 } 307 308 // Start namenode with additional dfs.name.dir and dfs.name.edits.dir 309 conf = new Configuration(); 310 assertTrue(newNameDir.mkdir()); 311 assertTrue(newEditsDir.mkdir()); 312 313 conf.set("dfs.name.dir", nameAndEdits.getPath() + 314 "," + newNameDir.getPath()); 315 conf.set("dfs.name.edits.dir", nameAndEdits.getPath() + 316 "," + newEditsDir.getPath()); 317 replication = (short)conf.getInt("dfs.replication", 3); 318 // Manage our own dfs directories. Do not format. 319 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 320 null, null, null, null); 321 cluster.waitActive(); 322 fileSys = cluster.getFileSystem(); 323 324 try { 325 assertTrue(fileSys.exists(file1)); 326 checkFile(fileSys, file1, replication); 327 cleanupFile(fileSys, file1); 328 writeFile(fileSys, file2, replication); 329 checkFile(fileSys, file2, replication); 330 } finally { 331 fileSys.close(); 332 cluster.shutdown(); 333 } 334 335 // Now remove common directory both have and start namenode with 336 // separate name and edits dirs 337 conf = new Configuration(); 338 conf.set("dfs.name.dir", newNameDir.getPath()); 339 conf.set("dfs.name.edits.dir", newEditsDir.getPath()); 340 replication = (short)conf.getInt("dfs.replication", 3); 341 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 342 null, null, null, null); 343 cluster.waitActive(); 344 fileSys = cluster.getFileSystem(); 345 346 try { 347 assertTrue(!fileSys.exists(file1)); 348 assertTrue(fileSys.exists(file2)); 349 checkFile(fileSys, file2, replication); 350 cleanupFile(fileSys, file2); 351 writeFile(fileSys, file3, replication); 352 checkFile(fileSys, file3, replication); 353 } finally { 354 fileSys.close(); 355 cluster.shutdown(); 356 } 357 358 // Add old shared directory for name and edits along with latest name 359 conf = new Configuration(); 360 conf.set("dfs.name.dir", newNameDir.getPath() + "," + 361 nameAndEdits.getPath()); 362 conf.set("dfs.name.edits.dir", nameAndEdits.getPath()); 363 replication = (short)conf.getInt("dfs.replication", 3); 364 try { 365 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 366 null, null, null, null); 367 assertTrue(false); 368 } catch (IOException e) { // expect to fail 369 System.out.println("cluster start failed due to missing " + 370 "latest edits dir"); 371 } finally { 372 cluster = null; 373 } 374 375 // Add old shared directory for name and edits along with latest edits 376 conf = new Configuration(); 377 conf.set("dfs.name.dir", nameAndEdits.getPath()); 378 conf.set("dfs.name.edits.dir", newEditsDir.getPath() + 379 "," + nameAndEdits.getPath()); 380 replication = (short)conf.getInt("dfs.replication", 3); 381 try { 382 cluster = new MiniDFSCluster(0, conf, NUM_DATA_NODES, false, false, true, 383 null, null, null, null); 384 assertTrue(false); 385 } catch (IOException e) { // expect to fail 386 System.out.println("cluster start failed due to missing latest name dir"); 387 } finally { 388 cluster = null; 389 } 390 } 391 } 392