1 /*
2 * Copyright 2018, 2020 Jonathan Dieter <jdieter@gmail.com>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
18 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #define _GNU_SOURCE
28
29 #include <assert.h>
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdint.h>
34 #include <stdbool.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/wait.h>
38 #include <fcntl.h>
39 #include <libgen.h>
40 #include <dirent.h>
41 #include <unistd.h>
42 #include <argp.h>
43 #include <zck.h>
44
45 #if defined(stdout)
46 #undef stdout
47 #endif
48
49 #include "util_common.h"
50
51 static char doc[] = "zck_gen_zdict - Generate a zdict for a zchunk file";
52
53 static char args_doc[] = "<file>";
54
55 static struct argp_option options[] = {
56 {"verbose", 'v', 0, 0,
57 "Increase verbosity (can be specified more than once for debugging)"},
58 /*{"stdout", 'c', 0, 0, "Direct output to stdout"},*/
59 {"dir", 'd', "DIRECTORY", 0,
60 "Write individual chunks to DIRECTORY (defaults to temporary directory)"},
61 {"version", 'V', 0, 0, "Show program version"},
62 { 0 }
63 };
64
65 struct arguments {
66 char *args[1];
67 char *dir;
68 zck_log_type log_level;
69 bool stdout;
70 bool exit;
71 };
72
parse_opt(int key,char * arg,struct argp_state * state)73 static error_t parse_opt (int key, char *arg, struct argp_state *state) {
74 struct arguments *arguments = state->input;
75
76 if(arguments->exit)
77 return 0;
78
79 switch (key) {
80 case 'v':
81 arguments->log_level--;
82 if(arguments->log_level < ZCK_LOG_DDEBUG)
83 arguments->log_level = ZCK_LOG_DDEBUG;
84 break;
85 /*case 'c':
86 arguments->stdout = true;
87 break;*/
88 case 'd':
89 arguments->dir = arg;
90 break;
91 case 'V':
92 version();
93 arguments->exit = true;
94 break;
95 case ARGP_KEY_ARG:
96 if (state->arg_num >= 1) {
97 argp_usage (state);
98 return EINVAL;
99 }
100 arguments->args[state->arg_num] = arg;
101
102 break;
103
104 case ARGP_KEY_END:
105 if (state->arg_num < 1) {
106 argp_usage (state);
107 return EINVAL;
108 }
109 break;
110
111 default:
112 return ARGP_ERR_UNKNOWN;
113 }
114 return 0;
115 }
116
117 static struct argp argp = {options, parse_opt, args_doc, doc};
118
get_tmp_dir(char * old_dir)119 char *get_tmp_dir(char *old_dir) {
120 char *dir = NULL;
121 if(old_dir == NULL) {
122 char template[] = "zcktempXXXXXX";
123 char *tmpdir = getenv("TMPDIR");
124
125 if(tmpdir == NULL) {
126 tmpdir = "/tmp/";
127 } else if(strlen(tmpdir) > 1024) {
128 printf("TMPDIR environmental variable is > 1024 bytes\n");
129 return NULL;
130 }
131
132 char *base_dir = calloc(strlen(template) + strlen(tmpdir) + 2, 1);
133 assert(base_dir);
134 int i=0;
135 for(i=0; i<strlen(tmpdir); i++)
136 base_dir[i] = tmpdir[i];
137 int offset = i;
138 base_dir[offset] = '/';
139 offset++;
140 for(i=0; i<strlen(template); i++)
141 base_dir[offset + i] = template[i];
142 offset += i;
143 base_dir[offset] = '\0';
144 dir = mkdtemp(base_dir);
145 if(dir == NULL) {
146 perror("ERROR: ");
147 return NULL;
148 }
149 } else {
150 dir = calloc(strlen(old_dir) + 1, 1);
151 assert(dir);
152 int i=0;
153 for(i=0; i<strlen(old_dir); i++)
154 dir[i] = old_dir[i];
155 dir[i] = '\0';
156 }
157 return dir;
158 }
159
main(int argc,char * argv[])160 int main (int argc, char *argv[]) {
161 struct arguments arguments = {0};
162
163 /* Defaults */
164 arguments.log_level = ZCK_LOG_ERROR;
165
166 int retval = argp_parse (&argp, argc, argv, 0, 0, &arguments);
167 if(retval || arguments.exit)
168 exit(retval);
169
170 zck_set_log_level(arguments.log_level);
171
172 int src_fd = open(arguments.args[0], O_RDONLY);
173 if(src_fd < 0) {
174 dprintf(STDERR_FILENO, "Unable to open %s\n", arguments.args[0]);
175 perror("");
176 exit(1);
177 }
178 char *base_name = basename(arguments.args[0]);
179 // len .zck -> .zdict = +2 + \0 = +3
180 char *out_name = calloc(strlen(base_name) + 3, 1);
181 assert(out_name);
182 snprintf(out_name, strlen(base_name) - 3, "%s", base_name); //Strip off .zck
183
184 char *dir = get_tmp_dir(arguments.dir);
185 if(dir == NULL) {
186 free(out_name);
187 exit(1);
188 }
189 bool good_exit = false;
190
191 char *data = NULL;
192 zckCtx *zck = zck_create();
193 if(!zck_init_read(zck, src_fd)) {
194 dprintf(STDERR_FILENO, "%s", zck_get_error(zck));
195 goto error2;
196 }
197
198 int ret = zck_validate_data_checksum(zck);
199 if(ret < 1) {
200 if(ret == -1)
201 dprintf(STDERR_FILENO, "Data checksum failed verification\n");
202 goto error2;
203 }
204
205 for(zckChunk *idx=zck_get_first_chunk(zck); idx!=NULL;
206 idx=zck_get_next_chunk(idx)) {
207 // Skip dictionary
208 if(idx == zck_get_first_chunk(zck))
209 continue;
210 ssize_t chunk_size = zck_get_chunk_size(idx);
211 if(chunk_size < 0) {
212 dprintf(STDERR_FILENO, "%s", zck_get_error(zck));
213 goto error2;
214 }
215 data = calloc(chunk_size, 1);
216 assert(data);
217 ssize_t read_size = zck_get_chunk_data(idx, data, chunk_size);
218 if(read_size != chunk_size) {
219 if(read_size < 0)
220 dprintf(STDERR_FILENO, "%s", zck_get_error(zck));
221 else
222 dprintf(STDERR_FILENO,
223 "Chunk %li size doesn't match expected size: %li != %li\n",
224 zck_get_chunk_number(idx), read_size, chunk_size);
225 goto error2;
226 }
227
228 char *dict_block = calloc(strlen(dir) + strlen(out_name) + 12, 1);
229 assert(dict_block);
230 snprintf(dict_block, strlen(dir) + strlen(out_name) + 12, "%s/%s.%li",
231 dir, out_name, zck_get_chunk_number(idx));
232 int dst_fd = open(dict_block, O_TRUNC | O_WRONLY | O_CREAT, 0666);
233 if(dst_fd < 0) {
234 dprintf(STDERR_FILENO, "Unable to open %s", dict_block);
235 perror("");
236 free(dict_block);
237 goto error2;
238 }
239 if(write(dst_fd, data, chunk_size) != chunk_size) {
240 dprintf(STDERR_FILENO, "Error writing to %s\n", dict_block);
241 free(dict_block);
242 goto error2;
243 }
244 free(data);
245 close(dst_fd);
246 free(dict_block);
247 }
248 snprintf(out_name + strlen(base_name) - 4, 7, ".zdict");
249
250 if(!zck_close(zck)) {
251 dprintf(STDERR_FILENO, "%s", zck_get_error(zck));
252 goto error2;
253 }
254
255 /* Create dictionary */
256 int pid = fork();
257 if(pid == 0) {
258 execl("/usr/bin/zstd", "zstd", "--train", dir, "-r", "-o", out_name, NULL);
259 dprintf(STDERR_FILENO, "Unable to find /usr/bin/zstd\n");
260 exit(1);
261 }
262 int wstatus = 0;
263 int w = waitpid(pid, &wstatus, 0);
264 if (w == -1) {
265 dprintf(STDERR_FILENO, "Error waiting for zstd\n");
266 perror("");
267 goto error2;
268 }
269 if(WEXITSTATUS(wstatus) != 0) {
270 dprintf(STDERR_FILENO, "Error generating dict\n");
271 goto error2;
272 }
273
274 /* Clean up temporary directory */
275 if(!arguments.dir) {
276 struct dirent *dp;
277 DIR *dfd;
278
279 if ((dfd = opendir(dir)) == NULL) {
280 dprintf(STDERR_FILENO, "Unable to read %s\n", dir);
281 goto error2;
282 }
283
284 bool err = false;
285 while((dp = readdir(dfd)) != NULL) {
286 if(dp->d_name[0] == '.')
287 continue;
288 char *full_path = calloc(strlen(dir) + strlen(dp->d_name) + 2, 1);
289 snprintf(full_path, strlen(dir) + strlen(dp->d_name) + 2, "%s/%s",
290 dir, dp->d_name);
291 if(unlink(full_path) != 0) {
292 dprintf(STDERR_FILENO, "Unable to remove %s\n", full_path);
293 perror("");
294 err = true;
295 } else {
296 if(arguments.log_level <= ZCK_LOG_INFO)
297 dprintf(STDERR_FILENO, "Removed %s\n", full_path);
298 }
299 free(full_path);
300 }
301 closedir(dfd);
302 if(!err) {
303 if(rmdir(dir) != 0) {
304 dprintf(STDERR_FILENO, "Unable to remove %s\n", dir);
305 perror("");
306 }
307 } else {
308 dprintf(STDERR_FILENO, "Errors encountered, not removing %s\n",
309 dir);
310 }
311 }
312 good_exit = true;
313 error2:
314 free(dir);
315 zck_free(&zck);
316 if(!good_exit)
317 unlink(out_name);
318 free(out_name);
319 close(src_fd);
320 if(!good_exit)
321 exit(1);
322 exit(0);
323 }
324