1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.util; 19 20 import java.io.FileNotFoundException; 21 import java.io.FileOutputStream; 22 import java.io.PrintStream; 23 import java.util.ArrayList; 24 import java.util.List; 25 import java.util.Properties; 26 import java.util.Random; 27 import java.util.zip.CRC32; 28 import java.util.zip.Checksum; 29 import org.junit.Assert; 30 import org.junit.Test; 31 32 /** 33 * Unit test to verify that the pure-Java CRC32 algorithm gives 34 * the same results as the built-in implementation. 35 */ 36 public class TestPureJavaCrc32 { 37 private final CRC32 theirs = new CRC32(); 38 private final PureJavaCrc32 ours = new PureJavaCrc32(); 39 40 @Test testCorrectness()41 public void testCorrectness() throws Exception { 42 checkSame(); 43 44 theirs.update(104); 45 ours.update(104); 46 checkSame(); 47 48 checkOnBytes(new byte[] {40, 60, 97, -70}, false); 49 50 checkOnBytes("hello world!".getBytes("UTF-8"), false); 51 52 for (int i = 0; i < 10000; i++) { 53 byte randomBytes[] = new byte[new Random().nextInt(2048)]; 54 new Random().nextBytes(randomBytes); 55 checkOnBytes(randomBytes, false); 56 } 57 58 } 59 checkOnBytes(byte[] bytes, boolean print)60 private void checkOnBytes(byte[] bytes, boolean print) { 61 theirs.reset(); 62 ours.reset(); 63 checkSame(); 64 65 for (int i = 0; i < bytes.length; i++) { 66 ours.update(bytes[i]); 67 theirs.update(bytes[i]); 68 checkSame(); 69 } 70 71 if (print) { 72 System.out.println("theirs:\t" + Long.toHexString(theirs.getValue()) 73 + "\nours:\t" + Long.toHexString(ours.getValue())); 74 } 75 76 theirs.reset(); 77 ours.reset(); 78 79 ours.update(bytes, 0, bytes.length); 80 theirs.update(bytes, 0, bytes.length); 81 if (print) { 82 System.out.println("theirs:\t" + Long.toHexString(theirs.getValue()) 83 + "\nours:\t" + Long.toHexString(ours.getValue())); 84 } 85 86 checkSame(); 87 88 if (bytes.length >= 10) { 89 ours.update(bytes, 5, 5); 90 theirs.update(bytes, 5, 5); 91 checkSame(); 92 } 93 } 94 checkSame()95 private void checkSame() { 96 Assert.assertEquals(theirs.getValue(), ours.getValue()); 97 } 98 99 /** 100 * Generate a table to perform checksums based on the same CRC-32 polynomial 101 * that java.util.zip.CRC32 uses. 102 */ 103 public static class Table { 104 private static final int polynomial = 0xEDB88320; 105 106 private final int[][] tables; 107 Table(final int nBits, final int nTables)108 private Table(final int nBits, final int nTables) { 109 tables = new int[nTables][]; 110 final int size = 1 << nBits; 111 for(int i = 0; i < tables.length; i++) { 112 tables[i] = new int[size]; 113 } 114 115 //compute the first table 116 final int[] first = tables[0]; 117 for (int i = 0; i < first.length; i++) { 118 int crc = i; 119 for (int j = 0; j < nBits; j++) { 120 if ((crc & 1) == 1) { 121 crc >>>= 1; 122 crc ^= polynomial; 123 } else { 124 crc >>>= 1; 125 } 126 } 127 first[i] = crc; 128 } 129 130 //compute the remaining tables 131 final int mask = first.length - 1; 132 for(int j = 1; j < tables.length; j++) { 133 final int[] previous = tables[j-1]; 134 final int[] current = tables[j]; 135 for (int i = 0; i < current.length; i++) { 136 current[i] = (previous[i] >>> nBits) ^ first[previous[i] & mask]; 137 } 138 } 139 } 140 toStrings(String nameformat)141 String[] toStrings(String nameformat) { 142 final String[] s = new String[tables.length]; 143 for (int j = 0; j < tables.length; j++) { 144 final int[] t = tables[j]; 145 final StringBuilder b = new StringBuilder(); 146 b.append(String.format(" /* "+ nameformat +" */", j)); 147 for (int i = 0; i < t.length;) { 148 b.append("\n "); 149 for(int k = 0; k < 4; k++) { 150 b.append(String.format("0x%08X, ", t[i++])); 151 } 152 } 153 s[j] = b.toString(); 154 } 155 return s; 156 } 157 158 /** {@inheritDoc} */ toString()159 public String toString() { 160 final StringBuilder b = new StringBuilder(); 161 162 final String tableFormat = String.format("T%d_", 163 Integer.numberOfTrailingZeros(tables[0].length)) + "%d"; 164 final String startFormat = " private static final int "+tableFormat+"_start = %d*256;"; 165 166 for (int j = 0; j < tables.length; j++) { 167 b.append(String.format(startFormat, j, j)); 168 b.append("\n"); 169 } 170 171 b.append(" private static final int[] T = new int[] {"); 172 for(String s : toStrings(tableFormat)) { 173 b.append("\n"); 174 b.append(s); 175 } 176 b.setCharAt(b.length() - 2, '\n'); 177 b.append(" };\n"); 178 return b.toString(); 179 } 180 181 /** Generate CRC-32 lookup tables */ main(String[] args)182 public static void main(String[] args) throws FileNotFoundException { 183 int i = 8; 184 final PrintStream out = new PrintStream( 185 new FileOutputStream("table" + i + ".txt"), true); 186 final Table t = new Table(i, 16); 187 final String s = t.toString(); 188 System.out.println(s); 189 out.println(s); 190 } 191 } 192 193 /** 194 * Performance tests to compare performance of the Pure Java implementation 195 * to the built-in java.util.zip implementation. This can be run from the 196 * command line with: 197 * 198 * java -cp path/to/test/classes:path/to/common/classes \ 199 * 'org.apache.hadoop.util.TestPureJavaCrc32$PerformanceTest' 200 * 201 * The output is in JIRA table format. 202 */ 203 public static class PerformanceTest { 204 public static final int MAX_LEN = 32*1024*1024; // up to 32MB chunks 205 public static final int BYTES_PER_SIZE = MAX_LEN * 4; 206 207 static final Checksum zip = new CRC32(); 208 static final Checksum[] CRCS = {new PureJavaCrc32()}; 209 main(String args[])210 public static void main(String args[]) { 211 printSystemProperties(System.out); 212 doBench(CRCS, System.out); 213 } 214 printCell(String s, int width, PrintStream out)215 private static void printCell(String s, int width, PrintStream out) { 216 final int w = s.length() > width? s.length(): width; 217 out.printf(" %" + w + "s |", s); 218 } 219 doBench(final Checksum[] crcs, final PrintStream out)220 private static void doBench(final Checksum[] crcs, final PrintStream out) { 221 final ArrayList<Checksum> a = new ArrayList<Checksum>(); 222 a.add(zip); 223 for (Checksum c : crcs) 224 if(c.getClass() != zip.getClass()) 225 a.add(c); 226 doBench(a, out); 227 } 228 doBench(final List<Checksum> crcs, final PrintStream out )229 private static void doBench(final List<Checksum> crcs, final PrintStream out 230 ) { 231 final byte[] bytes = new byte[MAX_LEN]; 232 new Random().nextBytes(bytes); 233 234 // Print header 235 out.printf("\nPerformance Table (The unit is MB/sec)\n||"); 236 final String title = "Num Bytes"; 237 printCell("Num Bytes", 0, out); 238 for (Checksum c : crcs) { 239 out.printf("|"); 240 printCell(c.getClass().getSimpleName(), 8, out); 241 } 242 out.printf("|\n"); 243 244 // Warm up implementations to get jit going. 245 for (Checksum c : crcs) { 246 doBench(c, bytes, 2, null); 247 doBench(c, bytes, 2101, null); 248 } 249 250 // Test on a variety of sizes 251 for (int size = 1; size < MAX_LEN; size *= 2) { 252 out.printf("|"); 253 printCell(String.valueOf(size), title.length()+1, out); 254 255 Long expected = null; 256 for(Checksum c : crcs) { 257 System.gc(); 258 final long result = doBench(c, bytes, size, out); 259 if(c.getClass() == zip.getClass()) { 260 expected = result; 261 } else if (result != expected) { 262 throw new RuntimeException(c.getClass() + " has bugs!"); 263 } 264 265 } 266 out.printf("\n"); 267 } 268 } 269 doBench(Checksum crc, byte[] bytes, int size, PrintStream out)270 private static long doBench(Checksum crc, byte[] bytes, int size, 271 PrintStream out) { 272 final String name = crc.getClass().getSimpleName(); 273 final int trials = BYTES_PER_SIZE / size; 274 275 final long st = System.nanoTime(); 276 crc.reset(); 277 for (int i = 0; i < trials; i++) { 278 crc.update(bytes, 0, size); 279 } 280 final long result = crc.getValue(); 281 final long et = System.nanoTime(); 282 283 double mbProcessed = trials * size / 1024.0 / 1024.0; 284 double secsElapsed = (et - st) / 1000000000.0d; 285 if (out != null) { 286 final String s = String.format("%9.3f", mbProcessed/secsElapsed); 287 printCell(s, name.length()+1, out); 288 } 289 return result; 290 } 291 printSystemProperties(PrintStream out)292 private static void printSystemProperties(PrintStream out) { 293 final String[] names = { 294 "java.version", 295 "java.runtime.name", 296 "java.runtime.version", 297 "java.vm.version", 298 "java.vm.vendor", 299 "java.vm.name", 300 "java.vm.specification.version", 301 "java.specification.version", 302 "os.arch", 303 "os.name", 304 "os.version" 305 }; 306 final Properties p = System.getProperties(); 307 for(String n : names) { 308 out.println(n + " = " + p.getProperty(n)); 309 } 310 } 311 } 312 } 313