1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package org.apache.hadoop.mapred; 20 21 import java.io.*; 22 import junit.framework.TestCase; 23 24 import org.apache.hadoop.fs.*; 25 import org.apache.hadoop.io.*; 26 27 import org.apache.hadoop.mapred.lib.*; 28 29 public class TestMultipleTextOutputFormat extends TestCase { 30 private static JobConf defaultConf = new JobConf(); 31 32 private static FileSystem localFs = null; 33 static { 34 try { 35 localFs = FileSystem.getLocal(defaultConf); 36 } catch (IOException e) { 37 throw new RuntimeException("init failure", e); 38 } 39 } 40 41 // A random task attempt id for testing. 42 private static String attempt = "attempt_200707121733_0001_m_000000_0"; 43 44 private static Path workDir = 45 new Path(new Path( 46 new Path(System.getProperty("test.build.data", "."), 47 "data"), 48 FileOutputCommitter.TEMP_DIR_NAME), "_" + attempt); 49 writeData(RecordWriter<Text, Text> rw)50 private static void writeData(RecordWriter<Text, Text> rw) throws IOException { 51 for (int i = 10; i < 40; i++) { 52 String k = "" + i; 53 String v = "" + i; 54 rw.write(new Text(k), new Text(v)); 55 } 56 } 57 58 static class KeyBasedMultipleTextOutputFormat extends MultipleTextOutputFormat<Text, Text> { generateFileNameForKeyValue(Text key, Text v, String name)59 protected String generateFileNameForKeyValue(Text key, Text v, String name) { 60 61 return key.toString().substring(0, 1) + "-" + name; 62 } 63 } 64 test1(JobConf job)65 private static void test1(JobConf job) throws IOException { 66 FileSystem fs = FileSystem.getLocal(job); 67 String name = "part-00000"; 68 KeyBasedMultipleTextOutputFormat theOutputFormat = new KeyBasedMultipleTextOutputFormat(); 69 RecordWriter<Text, Text> rw = theOutputFormat.getRecordWriter(fs, job, name, null); 70 writeData(rw); 71 rw.close(null); 72 } 73 test2(JobConf job)74 private static void test2(JobConf job) throws IOException { 75 FileSystem fs = FileSystem.getLocal(job); 76 String name = "part-00000"; 77 //pretend that we have input file with 1/2/3 as the suffix 78 job.set(JobContext.MAP_INPUT_FILE, "1/2/3"); 79 // we use the last two legs of the input file as the output file 80 job.set("mapred.outputformat.numOfTrailingLegs", "2"); 81 MultipleTextOutputFormat<Text, Text> theOutputFormat = new MultipleTextOutputFormat<Text, Text>(); 82 RecordWriter<Text, Text> rw = theOutputFormat.getRecordWriter(fs, job, name, null); 83 writeData(rw); 84 rw.close(null); 85 } 86 testFormat()87 public void testFormat() throws Exception { 88 JobConf job = new JobConf(); 89 job.set(JobContext.TASK_ATTEMPT_ID, attempt); 90 FileOutputFormat.setOutputPath(job, workDir.getParent().getParent()); 91 FileOutputFormat.setWorkOutputPath(job, workDir); 92 FileSystem fs = workDir.getFileSystem(job); 93 if (!fs.mkdirs(workDir)) { 94 fail("Failed to create output directory"); 95 } 96 //System.out.printf("workdir: %s\n", workDir.toString()); 97 TestMultipleTextOutputFormat.test1(job); 98 TestMultipleTextOutputFormat.test2(job); 99 String file_11 = "1-part-00000"; 100 101 File expectedFile_11 = new File(new Path(workDir, file_11).toString()); 102 103 //System.out.printf("expectedFile_11: %s\n", new Path(workDir, file_11).toString()); 104 StringBuffer expectedOutput = new StringBuffer(); 105 for (int i = 10; i < 20; i++) { 106 expectedOutput.append(""+i).append('\t').append(""+i).append("\n"); 107 } 108 String output = UtilsForTests.slurp(expectedFile_11); 109 //System.out.printf("File_2 output: %s\n", output); 110 assertEquals(output, expectedOutput.toString()); 111 112 String file_12 = "2-part-00000"; 113 114 File expectedFile_12 = new File(new Path(workDir, file_12).toString()); 115 //System.out.printf("expectedFile_12: %s\n", new Path(workDir, file_12).toString()); 116 expectedOutput = new StringBuffer(); 117 for (int i = 20; i < 30; i++) { 118 expectedOutput.append(""+i).append('\t').append(""+i).append("\n"); 119 } 120 output = UtilsForTests.slurp(expectedFile_12); 121 //System.out.printf("File_2 output: %s\n", output); 122 assertEquals(output, expectedOutput.toString()); 123 124 String file_13 = "3-part-00000"; 125 126 File expectedFile_13 = new File(new Path(workDir, file_13).toString()); 127 //System.out.printf("expectedFile_13: %s\n", new Path(workDir, file_13).toString()); 128 expectedOutput = new StringBuffer(); 129 for (int i = 30; i < 40; i++) { 130 expectedOutput.append(""+i).append('\t').append(""+i).append("\n"); 131 } 132 output = UtilsForTests.slurp(expectedFile_13); 133 //System.out.printf("File_2 output: %s\n", output); 134 assertEquals(output, expectedOutput.toString()); 135 136 String file_2 = "2/3"; 137 138 File expectedFile_2 = new File(new Path(workDir, file_2).toString()); 139 //System.out.printf("expectedFile_2: %s\n", new Path(workDir, file_2).toString()); 140 expectedOutput = new StringBuffer(); 141 for (int i = 10; i < 40; i++) { 142 expectedOutput.append(""+i).append('\t').append(""+i).append("\n"); 143 } 144 output = UtilsForTests.slurp(expectedFile_2); 145 //System.out.printf("File_2 output: %s\n", output); 146 assertEquals(output, expectedOutput.toString()); 147 } 148 main(String[] args)149 public static void main(String[] args) throws Exception { 150 new TestMultipleTextOutputFormat().testFormat(); 151 } 152 } 153