1 /**
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 package org.apache.hadoop.util;
19 
20 import java.io.FileNotFoundException;
21 import java.io.FileOutputStream;
22 import java.io.PrintStream;
23 import java.util.ArrayList;
24 import java.util.List;
25 import java.util.Properties;
26 import java.util.Random;
27 import java.util.zip.CRC32;
28 import java.util.zip.Checksum;
29 import org.junit.Assert;
30 import org.junit.Test;
31 
32 /**
33  * Unit test to verify that the pure-Java CRC32 algorithm gives
34  * the same results as the built-in implementation.
35  */
36 public class TestPureJavaCrc32 {
37   private final CRC32 theirs = new CRC32();
38   private final PureJavaCrc32 ours = new PureJavaCrc32();
39 
40   @Test
testCorrectness()41   public void testCorrectness() throws Exception {
42     checkSame();
43 
44     theirs.update(104);
45     ours.update(104);
46     checkSame();
47 
48     checkOnBytes(new byte[] {40, 60, 97, -70}, false);
49 
50     checkOnBytes("hello world!".getBytes("UTF-8"), false);
51 
52     for (int i = 0; i < 10000; i++) {
53       byte randomBytes[] = new byte[new Random().nextInt(2048)];
54       new Random().nextBytes(randomBytes);
55       checkOnBytes(randomBytes, false);
56     }
57 
58   }
59 
checkOnBytes(byte[] bytes, boolean print)60   private void checkOnBytes(byte[] bytes, boolean print) {
61     theirs.reset();
62     ours.reset();
63     checkSame();
64 
65     for (int i = 0; i < bytes.length; i++) {
66       ours.update(bytes[i]);
67       theirs.update(bytes[i]);
68       checkSame();
69     }
70 
71     if (print) {
72       System.out.println("theirs:\t" + Long.toHexString(theirs.getValue())
73                          + "\nours:\t" + Long.toHexString(ours.getValue()));
74     }
75 
76     theirs.reset();
77     ours.reset();
78 
79     ours.update(bytes, 0, bytes.length);
80     theirs.update(bytes, 0, bytes.length);
81     if (print) {
82       System.out.println("theirs:\t" + Long.toHexString(theirs.getValue())
83                          + "\nours:\t" + Long.toHexString(ours.getValue()));
84     }
85 
86     checkSame();
87 
88     if (bytes.length >= 10) {
89       ours.update(bytes, 5, 5);
90       theirs.update(bytes, 5, 5);
91       checkSame();
92     }
93   }
94 
checkSame()95   private void checkSame() {
96     Assert.assertEquals(theirs.getValue(), ours.getValue());
97   }
98 
99   /**
100    * Generate a table to perform checksums based on the same CRC-32 polynomial
101    * that java.util.zip.CRC32 uses.
102    */
103   public static class Table {
104     private static final int polynomial = 0xEDB88320;
105 
106     private final int[][] tables;
107 
Table(final int nBits, final int nTables)108     private Table(final int nBits, final int nTables) {
109       tables = new int[nTables][];
110       final int size = 1 << nBits;
111       for(int i = 0; i < tables.length; i++) {
112         tables[i] = new int[size];
113       }
114 
115       //compute the first table
116       final int[] first = tables[0];
117       for (int i = 0; i < first.length; i++) {
118         int crc = i;
119         for (int j = 0; j < nBits; j++) {
120           if ((crc & 1) == 1) {
121             crc >>>= 1;
122             crc ^= polynomial;
123           } else {
124             crc >>>= 1;
125           }
126         }
127         first[i] = crc;
128       }
129 
130       //compute the remaining tables
131       final int mask = first.length - 1;
132       for(int j = 1; j < tables.length; j++) {
133         final int[] previous = tables[j-1];
134         final int[] current = tables[j];
135         for (int i = 0; i < current.length; i++) {
136           current[i] = (previous[i] >>> nBits) ^ first[previous[i] & mask];
137         }
138       }
139     }
140 
toStrings(String nameformat)141     String[] toStrings(String nameformat) {
142       final String[] s = new String[tables.length];
143       for (int j = 0; j < tables.length; j++) {
144         final int[] t = tables[j];
145         final StringBuilder b = new StringBuilder();
146         b.append(String.format("    /* "+ nameformat +" */", j));
147         for (int i = 0; i < t.length;) {
148           b.append("\n    ");
149           for(int k = 0; k < 4; k++) {
150             b.append(String.format("0x%08X, ", t[i++]));
151           }
152         }
153         s[j] = b.toString();
154       }
155       return s;
156     }
157 
158     /** {@inheritDoc} */
toString()159     public String toString() {
160       final StringBuilder b = new StringBuilder();
161 
162       final String tableFormat = String.format("T%d_",
163         Integer.numberOfTrailingZeros(tables[0].length)) + "%d";
164       final String startFormat = "  private static final int "+tableFormat+"_start = %d*256;";
165 
166       for (int j = 0; j < tables.length; j++) {
167         b.append(String.format(startFormat, j, j));
168         b.append("\n");
169       }
170 
171       b.append("  private static final int[] T = new int[] {");
172       for(String s : toStrings(tableFormat)) {
173         b.append("\n");
174         b.append(s);
175       }
176       b.setCharAt(b.length() - 2, '\n');
177       b.append(" };\n");
178       return b.toString();
179     }
180 
181     /** Generate CRC-32 lookup tables */
main(String[] args)182     public static void main(String[] args) throws FileNotFoundException {
183       int i = 8;
184       final PrintStream out = new PrintStream(
185           new FileOutputStream("table" + i + ".txt"), true);
186       final Table t = new Table(i, 16);
187       final String s = t.toString();
188       System.out.println(s);
189       out.println(s);
190     }
191   }
192 
193   /**
194    * Performance tests to compare performance of the Pure Java implementation
195    * to the built-in java.util.zip implementation. This can be run from the
196    * command line with:
197    *
198    *   java -cp path/to/test/classes:path/to/common/classes \
199    *      'org.apache.hadoop.util.TestPureJavaCrc32$PerformanceTest'
200    *
201    * The output is in JIRA table format.
202    */
203   public static class PerformanceTest {
204     public static final int MAX_LEN = 32*1024*1024; // up to 32MB chunks
205     public static final int BYTES_PER_SIZE = MAX_LEN * 4;
206 
207     static final Checksum zip = new CRC32();
208     static final Checksum[] CRCS = {new PureJavaCrc32()};
209 
main(String args[])210     public static void main(String args[]) {
211       printSystemProperties(System.out);
212       doBench(CRCS, System.out);
213     }
214 
printCell(String s, int width, PrintStream out)215     private static void printCell(String s, int width, PrintStream out) {
216       final int w = s.length() > width? s.length(): width;
217       out.printf(" %" + w + "s |", s);
218     }
219 
doBench(final Checksum[] crcs, final PrintStream out)220     private static void doBench(final Checksum[] crcs, final PrintStream out) {
221       final ArrayList<Checksum> a = new ArrayList<Checksum>();
222       a.add(zip);
223       for (Checksum c : crcs)
224         if(c.getClass() != zip.getClass())
225           a.add(c);
226       doBench(a, out);
227     }
228 
doBench(final List<Checksum> crcs, final PrintStream out )229     private static void doBench(final List<Checksum> crcs, final PrintStream out
230         ) {
231       final byte[] bytes = new byte[MAX_LEN];
232       new Random().nextBytes(bytes);
233 
234       // Print header
235       out.printf("\nPerformance Table (The unit is MB/sec)\n||");
236       final String title = "Num Bytes";
237       printCell("Num Bytes", 0, out);
238       for (Checksum c : crcs) {
239         out.printf("|");
240         printCell(c.getClass().getSimpleName(), 8, out);
241       }
242       out.printf("|\n");
243 
244       // Warm up implementations to get jit going.
245       for (Checksum c : crcs) {
246         doBench(c, bytes, 2, null);
247         doBench(c, bytes, 2101, null);
248       }
249 
250       // Test on a variety of sizes
251       for (int size = 1; size < MAX_LEN; size *= 2) {
252         out.printf("|");
253         printCell(String.valueOf(size), title.length()+1, out);
254 
255         Long expected = null;
256         for(Checksum c : crcs) {
257           System.gc();
258           final long result = doBench(c, bytes, size, out);
259           if(c.getClass() == zip.getClass()) {
260             expected = result;
261           } else if (result != expected) {
262             throw new RuntimeException(c.getClass() + " has bugs!");
263           }
264 
265         }
266         out.printf("\n");
267       }
268     }
269 
doBench(Checksum crc, byte[] bytes, int size, PrintStream out)270     private static long doBench(Checksum crc, byte[] bytes, int size,
271         PrintStream out) {
272       final String name = crc.getClass().getSimpleName();
273       final int trials = BYTES_PER_SIZE / size;
274 
275       final long st = System.nanoTime();
276       crc.reset();
277       for (int i = 0; i < trials; i++) {
278         crc.update(bytes, 0, size);
279       }
280       final long result = crc.getValue();
281       final long et = System.nanoTime();
282 
283       double mbProcessed = trials * size / 1024.0 / 1024.0;
284       double secsElapsed = (et - st) / 1000000000.0d;
285       if (out != null) {
286         final String s = String.format("%9.3f",  mbProcessed/secsElapsed);
287         printCell(s, name.length()+1, out);
288       }
289       return result;
290     }
291 
printSystemProperties(PrintStream out)292     private static void printSystemProperties(PrintStream out) {
293       final String[] names = {
294           "java.version",
295           "java.runtime.name",
296           "java.runtime.version",
297           "java.vm.version",
298           "java.vm.vendor",
299           "java.vm.name",
300           "java.vm.specification.version",
301           "java.specification.version",
302           "os.arch",
303           "os.name",
304           "os.version"
305       };
306       final Properties p = System.getProperties();
307       for(String n : names) {
308         out.println(n + " = " + p.getProperty(n));
309       }
310     }
311   }
312 }
313