1 /* -*- indent-tabs-mode: nil -*-
2  *
3  * Copyright 2012-2013 Kubo Takehiro <kubo@jiubao.org>
4  *
5  * Redistribution and use in source and binary forms, with or without modification, are
6  * permitted provided that the following conditions are met:
7  *
8  *    1. Redistributions of source code must retain the above copyright notice, this list of
9  *       conditions and the following disclaimer.
10  *
11  *    2. Redistributions in binary form must reproduce the above copyright notice, this list
12  *       of conditions and the following disclaimer in the documentation and/or other materials
13  *       provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS OR IMPLIED
16  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
22  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
23  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  * The views and conclusions contained in the software and documentation are those of the
26  * authors and should not be interpreted as representing official policies, either expressed
27  * or implied, of the authors.
28  *
29  */
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include <stdlib.h>
35 #include <string.h>
36 #include <errno.h>
37 #include <snappy-c.h>
38 #include "snzip.h"
39 #include "crc32.h"
40 
41 #define COMPRESSED_DATA_IDENTIFIER 0x00
42 #define UNCOMPRESSED_DATA_IDENTIFIER 0x01
43 
44 /* 4.1. Stream identifier (0xff) */
45 static const char stream_header[10] = {0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59};
46 
47 #define MAX_DATA_LEN 16777215 /* maximum chunk data length */
48 #define MAX_UNCOMPRESSED_DATA_LEN 65536 /* maximum uncompressed data length excluding checksum */
49 
framing_format_compress(FILE * infp,FILE * outfp,size_t block_size)50 static int framing_format_compress(FILE *infp, FILE *outfp, size_t block_size)
51 {
52   const size_t max_uncompressed_data_len = MAX_UNCOMPRESSED_DATA_LEN;
53   const size_t max_compressed_data_len = snappy_max_compressed_length(max_uncompressed_data_len);
54   size_t uncompressed_data_len;
55   size_t compressed_data_len;
56   char *uncompressed_data = malloc(max_uncompressed_data_len);
57   char *compressed_data = malloc(max_compressed_data_len);
58   int err = 1;
59 
60   if (uncompressed_data == NULL || compressed_data == NULL) {
61     print_error("out of memory\n");
62     goto cleanup;
63   }
64 
65   /* write the steam header */
66   fwrite(stream_header, sizeof(stream_header), 1, outfp);
67 
68   /* write file body */
69   while ((uncompressed_data_len = fread(uncompressed_data, 1, max_uncompressed_data_len, infp)) > 0) {
70     unsigned int crc32c = masked_crc32c(uncompressed_data, uncompressed_data_len);
71     char type_code;
72     size_t write_len;
73     const char *write_data;
74 
75     /* compress the block. */
76     compressed_data_len = max_compressed_data_len;
77     snappy_compress(uncompressed_data, uncompressed_data_len, compressed_data, &compressed_data_len);
78 
79     if (compressed_data_len >= (uncompressed_data_len - (uncompressed_data_len / 8))) {
80       /* uncompressed data */
81       type_code = UNCOMPRESSED_DATA_IDENTIFIER;
82       write_len = uncompressed_data_len;
83       write_data = uncompressed_data;
84     } else {
85       /* compressed data */
86       type_code = COMPRESSED_DATA_IDENTIFIER;
87       write_len = compressed_data_len;
88       write_data = compressed_data;
89     }
90 
91     /* write block type */
92     putc(type_code, outfp);
93     /* write data length */
94     putc(((write_len + 4) >> 0), outfp);
95     putc(((write_len + 4) >> 8), outfp);
96     putc(((write_len + 4) >> 16), outfp);
97     /* write checksum */
98     putc((crc32c >>  0), outfp);
99     putc((crc32c >>  8), outfp);
100     putc((crc32c >> 16), outfp);
101     putc((crc32c >> 24), outfp);
102     /* write data */
103     if (fwrite(write_data, write_len, 1, outfp) != 1) {
104       print_error("Failed to write a file: %s\n", strerror(errno));
105       goto cleanup;
106     }
107   }
108   /* check stream errors */
109   if (ferror(infp)) {
110     print_error("Failed to read a file: %s\n", strerror(errno));
111     goto cleanup;
112   }
113   if (ferror(outfp)) {
114     print_error("Failed to write a file: %s\n", strerror(errno));
115     goto cleanup;
116   }
117   err = 0;
118  cleanup:
119   free(uncompressed_data);
120   free(compressed_data);
121   return err;
122 }
123 
read_data(char * buf,size_t buflen,FILE * fp)124 static int read_data(char *buf, size_t buflen, FILE *fp)
125 {
126   if (fread(buf, buflen, 1, fp) != 1) {
127     if (feof(fp)) {
128       print_error("Unexpected end of file\n");
129     } else {
130       print_error("Failed to read a file: %s\n", strerror(errno));
131     }
132     return -1;
133   }
134   return 0;
135 }
136 
137 /*
138  * Callers must ensure that the checksum pointer is aligned to a 4 byte boundary
139  * if the CPU disallows unaligned accesss.
140  */
check_crc32c(const char * data,size_t datalen,const char * checksum)141 static int check_crc32c(const char *data, size_t datalen, const char *checksum)
142 {
143   unsigned int actual_crc32c = masked_crc32c(data, datalen);
144   unsigned int expected_crc32c = SNZ_FROM_LE32(*(unsigned int*)checksum);
145   if (actual_crc32c != expected_crc32c) {
146     print_error("CRC32C error! (expected 0x%08x but 0x%08x)\n", expected_crc32c, actual_crc32c);
147     return -1;
148   }
149   return 0;
150 }
151 
framing_format_uncompress(FILE * infp,FILE * outfp,int skip_magic)152 static int framing_format_uncompress(FILE *infp, FILE *outfp, int skip_magic)
153 {
154   const size_t max_data_len = MAX_DATA_LEN;
155   const size_t max_uncompressed_data_len = MAX_UNCOMPRESSED_DATA_LEN;
156   size_t data_len;
157   size_t uncompressed_data_len;
158   char *data = malloc(max_data_len);
159   char *uncompressed_data = malloc(max_uncompressed_data_len);
160   int err = 1;
161 
162   if (data == NULL || uncompressed_data == NULL) {
163     print_error("out of memory\n");
164     goto cleanup;
165   }
166 
167   if (!skip_magic) {
168     /* read the steam header */
169     if (read_data(data, sizeof(stream_header), infp) != 0) {
170       goto cleanup;
171     }
172     if (memcmp(data, stream_header, sizeof(stream_header)) != 0) {
173       print_error("Invalid stream identfier\n");
174       goto cleanup;
175     }
176   }
177 
178   for (;;) {
179     int id = getc(infp);
180     if (id == EOF) {
181       break;
182     }
183     data_len = getc(infp);
184     data_len |= getc(infp) << 8;
185     data_len |= getc(infp) << 16;
186     if (data_len == (size_t)EOF) {
187       print_error("Unexpected end of file\n");
188       goto cleanup;
189     }
190     if (id == COMPRESSED_DATA_IDENTIFIER) {
191       /* 4.2. Compressed data (chunk type 0x00) */
192       if (data_len < 4) {
193         print_error("too short data length %lu\n", data_len);
194         goto cleanup;
195       }
196       if (read_data(data, data_len, infp) != 0) {
197         goto cleanup;
198       }
199       uncompressed_data_len = max_uncompressed_data_len;
200       if (snappy_uncompress(data + 4, data_len - 4, uncompressed_data, &uncompressed_data_len)) {
201         print_error("Invalid data: snappy_uncompress failed\n");
202         goto cleanup;
203       }
204       if (check_crc32c(uncompressed_data, uncompressed_data_len, data) != 0) {
205         goto cleanup;
206       }
207       if (fwrite(uncompressed_data, uncompressed_data_len, 1, outfp) != 1) {
208         break;
209       }
210     } else if (id == UNCOMPRESSED_DATA_IDENTIFIER) {
211       /* 4.3. Uncompressed data (chunk type 0x01) */
212       if (data_len < 4) {
213         print_error("too short data length %lu\n", data_len);
214         goto cleanup;
215       }
216       if (read_data(data, data_len, infp) != 0) {
217         goto cleanup;
218       }
219       if (check_crc32c(data + 4, data_len - 4, data) != 0) {
220         goto cleanup;
221       }
222       if (fwrite(data + 4, data_len - 4, 1, outfp) != 1) {
223         break;
224       }
225     } else if (id < 0x80) {
226       /* 4.4. Reserved unskippable chunks (chunk types 0x02-0x7f) */
227       print_error("Unsupported identifier 0x%02x\n", id);
228       goto cleanup;
229     } else {
230       /* 4.5. Reserved skippable chunks (chunk types 0x80-0xfe) */
231       while (data_len-- > 0) {
232         if (getc(infp) == EOF) {
233           print_error("Unexpected end of file\n");
234           goto cleanup;
235         }
236       }
237     }
238   }
239   /* check stream errors */
240   if (ferror(infp)) {
241     print_error("Failed to read a file: %s\n", strerror(errno));
242     goto cleanup;
243   }
244   if (ferror(outfp)) {
245     print_error("Failed to write a file: %s\n", strerror(errno));
246     goto cleanup;
247   }
248   err = 0;
249  cleanup:
250   free(data);
251   free(uncompressed_data);
252   return err;
253 }
254 
255 stream_format_t framing2_format = {
256   "framing2",
257   "https://github.com/google/snappy/blob/master/framing_format.txt",
258   "sz",
259   framing_format_compress,
260   framing_format_uncompress,
261 };
262