1 /* -*- indent-tabs-mode: nil -*-
2 *
3 * Copyright 2012-2013 Kubo Takehiro <kubo@jiubao.org>
4 *
5 * Redistribution and use in source and binary forms, with or without modification, are
6 * permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this list of
9 * conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
12 * of conditions and the following disclaimer in the documentation and/or other materials
13 * provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS OR IMPLIED
16 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
22 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *
25 * The views and conclusions contained in the software and documentation are those of the
26 * authors and should not be interpreted as representing official policies, either expressed
27 * or implied, of the authors.
28 *
29 */
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include <stdlib.h>
35 #include <string.h>
36 #include <errno.h>
37 #include <snappy-c.h>
38 #include "snzip.h"
39 #include "crc32.h"
40
41 #define COMPRESSED_DATA_IDENTIFIER 0x00
42 #define UNCOMPRESSED_DATA_IDENTIFIER 0x01
43
44 /* 4.1. Stream identifier (0xff) */
45 static const char stream_header[10] = {0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59};
46
47 #define MAX_DATA_LEN 16777215 /* maximum chunk data length */
48 #define MAX_UNCOMPRESSED_DATA_LEN 65536 /* maximum uncompressed data length excluding checksum */
49
framing_format_compress(FILE * infp,FILE * outfp,size_t block_size)50 static int framing_format_compress(FILE *infp, FILE *outfp, size_t block_size)
51 {
52 const size_t max_uncompressed_data_len = MAX_UNCOMPRESSED_DATA_LEN;
53 const size_t max_compressed_data_len = snappy_max_compressed_length(max_uncompressed_data_len);
54 size_t uncompressed_data_len;
55 size_t compressed_data_len;
56 char *uncompressed_data = malloc(max_uncompressed_data_len);
57 char *compressed_data = malloc(max_compressed_data_len);
58 int err = 1;
59
60 if (uncompressed_data == NULL || compressed_data == NULL) {
61 print_error("out of memory\n");
62 goto cleanup;
63 }
64
65 /* write the steam header */
66 fwrite(stream_header, sizeof(stream_header), 1, outfp);
67
68 /* write file body */
69 while ((uncompressed_data_len = fread(uncompressed_data, 1, max_uncompressed_data_len, infp)) > 0) {
70 unsigned int crc32c = masked_crc32c(uncompressed_data, uncompressed_data_len);
71 char type_code;
72 size_t write_len;
73 const char *write_data;
74
75 /* compress the block. */
76 compressed_data_len = max_compressed_data_len;
77 snappy_compress(uncompressed_data, uncompressed_data_len, compressed_data, &compressed_data_len);
78
79 if (compressed_data_len >= (uncompressed_data_len - (uncompressed_data_len / 8))) {
80 /* uncompressed data */
81 type_code = UNCOMPRESSED_DATA_IDENTIFIER;
82 write_len = uncompressed_data_len;
83 write_data = uncompressed_data;
84 } else {
85 /* compressed data */
86 type_code = COMPRESSED_DATA_IDENTIFIER;
87 write_len = compressed_data_len;
88 write_data = compressed_data;
89 }
90
91 /* write block type */
92 putc(type_code, outfp);
93 /* write data length */
94 putc(((write_len + 4) >> 0), outfp);
95 putc(((write_len + 4) >> 8), outfp);
96 putc(((write_len + 4) >> 16), outfp);
97 /* write checksum */
98 putc((crc32c >> 0), outfp);
99 putc((crc32c >> 8), outfp);
100 putc((crc32c >> 16), outfp);
101 putc((crc32c >> 24), outfp);
102 /* write data */
103 if (fwrite(write_data, write_len, 1, outfp) != 1) {
104 print_error("Failed to write a file: %s\n", strerror(errno));
105 goto cleanup;
106 }
107 }
108 /* check stream errors */
109 if (ferror(infp)) {
110 print_error("Failed to read a file: %s\n", strerror(errno));
111 goto cleanup;
112 }
113 if (ferror(outfp)) {
114 print_error("Failed to write a file: %s\n", strerror(errno));
115 goto cleanup;
116 }
117 err = 0;
118 cleanup:
119 free(uncompressed_data);
120 free(compressed_data);
121 return err;
122 }
123
read_data(char * buf,size_t buflen,FILE * fp)124 static int read_data(char *buf, size_t buflen, FILE *fp)
125 {
126 if (fread(buf, buflen, 1, fp) != 1) {
127 if (feof(fp)) {
128 print_error("Unexpected end of file\n");
129 } else {
130 print_error("Failed to read a file: %s\n", strerror(errno));
131 }
132 return -1;
133 }
134 return 0;
135 }
136
137 /*
138 * Callers must ensure that the checksum pointer is aligned to a 4 byte boundary
139 * if the CPU disallows unaligned accesss.
140 */
check_crc32c(const char * data,size_t datalen,const char * checksum)141 static int check_crc32c(const char *data, size_t datalen, const char *checksum)
142 {
143 unsigned int actual_crc32c = masked_crc32c(data, datalen);
144 unsigned int expected_crc32c = SNZ_FROM_LE32(*(unsigned int*)checksum);
145 if (actual_crc32c != expected_crc32c) {
146 print_error("CRC32C error! (expected 0x%08x but 0x%08x)\n", expected_crc32c, actual_crc32c);
147 return -1;
148 }
149 return 0;
150 }
151
framing_format_uncompress(FILE * infp,FILE * outfp,int skip_magic)152 static int framing_format_uncompress(FILE *infp, FILE *outfp, int skip_magic)
153 {
154 const size_t max_data_len = MAX_DATA_LEN;
155 const size_t max_uncompressed_data_len = MAX_UNCOMPRESSED_DATA_LEN;
156 size_t data_len;
157 size_t uncompressed_data_len;
158 char *data = malloc(max_data_len);
159 char *uncompressed_data = malloc(max_uncompressed_data_len);
160 int err = 1;
161
162 if (data == NULL || uncompressed_data == NULL) {
163 print_error("out of memory\n");
164 goto cleanup;
165 }
166
167 if (!skip_magic) {
168 /* read the steam header */
169 if (read_data(data, sizeof(stream_header), infp) != 0) {
170 goto cleanup;
171 }
172 if (memcmp(data, stream_header, sizeof(stream_header)) != 0) {
173 print_error("Invalid stream identfier\n");
174 goto cleanup;
175 }
176 }
177
178 for (;;) {
179 int id = getc(infp);
180 if (id == EOF) {
181 break;
182 }
183 data_len = getc(infp);
184 data_len |= getc(infp) << 8;
185 data_len |= getc(infp) << 16;
186 if (data_len == (size_t)EOF) {
187 print_error("Unexpected end of file\n");
188 goto cleanup;
189 }
190 if (id == COMPRESSED_DATA_IDENTIFIER) {
191 /* 4.2. Compressed data (chunk type 0x00) */
192 if (data_len < 4) {
193 print_error("too short data length %lu\n", data_len);
194 goto cleanup;
195 }
196 if (read_data(data, data_len, infp) != 0) {
197 goto cleanup;
198 }
199 uncompressed_data_len = max_uncompressed_data_len;
200 if (snappy_uncompress(data + 4, data_len - 4, uncompressed_data, &uncompressed_data_len)) {
201 print_error("Invalid data: snappy_uncompress failed\n");
202 goto cleanup;
203 }
204 if (check_crc32c(uncompressed_data, uncompressed_data_len, data) != 0) {
205 goto cleanup;
206 }
207 if (fwrite(uncompressed_data, uncompressed_data_len, 1, outfp) != 1) {
208 break;
209 }
210 } else if (id == UNCOMPRESSED_DATA_IDENTIFIER) {
211 /* 4.3. Uncompressed data (chunk type 0x01) */
212 if (data_len < 4) {
213 print_error("too short data length %lu\n", data_len);
214 goto cleanup;
215 }
216 if (read_data(data, data_len, infp) != 0) {
217 goto cleanup;
218 }
219 if (check_crc32c(data + 4, data_len - 4, data) != 0) {
220 goto cleanup;
221 }
222 if (fwrite(data + 4, data_len - 4, 1, outfp) != 1) {
223 break;
224 }
225 } else if (id < 0x80) {
226 /* 4.4. Reserved unskippable chunks (chunk types 0x02-0x7f) */
227 print_error("Unsupported identifier 0x%02x\n", id);
228 goto cleanup;
229 } else {
230 /* 4.5. Reserved skippable chunks (chunk types 0x80-0xfe) */
231 while (data_len-- > 0) {
232 if (getc(infp) == EOF) {
233 print_error("Unexpected end of file\n");
234 goto cleanup;
235 }
236 }
237 }
238 }
239 /* check stream errors */
240 if (ferror(infp)) {
241 print_error("Failed to read a file: %s\n", strerror(errno));
242 goto cleanup;
243 }
244 if (ferror(outfp)) {
245 print_error("Failed to write a file: %s\n", strerror(errno));
246 goto cleanup;
247 }
248 err = 0;
249 cleanup:
250 free(data);
251 free(uncompressed_data);
252 return err;
253 }
254
255 stream_format_t framing2_format = {
256 "framing2",
257 "https://github.com/google/snappy/blob/master/framing_format.txt",
258 "sz",
259 framing_format_compress,
260 framing_format_uncompress,
261 };
262