1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 2005-2013 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <glenn.s.fowler@gmail.com> *
18 * *
19 ***********************************************************************/
20 #pragma prototyped
21
22 /*
23 * sort file io vcodex discipline
24 */
25
26 static const char usage[] =
27 "[-1lp0s5P?\n@(#)$Id: vcodex (AT&T Research) 2012-11-27 $\n]"
28 USAGE_LICENSE
29 "[+PLUGIN?vcodex - sort io vcodex discipline library]"
30 "[+DESCRIPTION?The \bvcodex\b \bsort\b(1) discipline encodes and/or "
31 "decodes input, output and temporary file data. By default temporary and "
32 "output encoding is the same as the encoding used on the first encoded "
33 "input file. Output encoding is only applied to the standard output or "
34 "to files with a path suffix containing 'z'. If encoding is applied to "
35 "a regular output file and the output file path does not have a suffix "
36 "containing 'z' and the input path has a suffix containing 'z' then the "
37 "output path is renamed by appending the input path suffix.]"
38 "[i:input?Decode the input files using \amethod\a. \b--noinput\b "
39 "disables input decoding.]:[method]"
40 "[o:output?Encode the output file using \amethod\a. \b--nooutput\b "
41 "disables output encoding.]:[method]"
42 "[r:regress?Massage \bverbose\b output for regression testing.]"
43 "[t:temporary?Encode temporary intermediate files using "
44 "\amethod\a. \b--notemporary\b disables temporary encoding.]:[method]"
45 "[T:test?Enable test code defined by \amask\a. Test code is "
46 "implementation specific. Consult the source for details.]#[mask]"
47 "[v:verbose?Enable file and stream encoding messages on the standard "
48 "error.]"
49 "[+SEE ALSO?\bsort\b(1), \bvczip\b(1), \bvcodex\b(3)]"
50 "\n\n--library=vcodex[,option[=value]...]\n\n"
51 ;
52
53 #include <ast.h>
54 #include <error.h>
55 #include <ls.h>
56 #include <recsort.h>
57 #include <vcsfio.h>
58
59 struct Delay_s;
60 typedef struct Delay_s Delay_t;
61
62 struct Delay_s
63 {
64 Delay_t* next;
65 Sfio_t* sp;
66 char name[1];
67 };
68
69 typedef struct Encoding_s
70 {
71 char* trans;
72 char suffix[16];
73 int use;
74 } Encoding_t;
75
76 typedef struct State_s
77 {
78 Rsdisc_t disc;
79 Encoding_t input;
80 Encoding_t output;
81 Encoding_t temporary;
82 Delay_t* delay;
83 unsigned long test;
84 int outputs;
85 int regress;
86 int verbose;
87 } State_t;
88
89 #define tempid(s,f) ((s)->regress?(++(s)->regress):sffileno((Sfio_t*)(f)))
90 #define ZIPSUFFIX(p,s) ((s = strrchr(p, '.')) && strchr(s, 'z') && !strchr(s, '/'))
91
92 static int
zipit(const char * path)93 zipit(const char* path)
94 {
95 char* s;
96
97 return !path || ZIPSUFFIX(path, s);
98 }
99
100 static void
vcsferror(const char * mesg)101 vcsferror(const char* mesg)
102 {
103 error(2, "%s", mesg);
104 }
105
106 static int
push(Sfio_t * sp,Encoding_t * code,const char * trans,int type)107 push(Sfio_t* sp, Encoding_t* code, const char* trans, int type)
108 {
109 Vcsfdata_t* vcsf;
110
111 if (!trans && !type)
112 return sfraise(sp, VCSF_DISC, NiL) == VCSF_DISC;
113 if (!(vcsf = newof(0, Vcsfdata_t, 1, 0)))
114 return -1;
115 vcsf->trans = (char*)trans;
116 vcsf->type = VCSF_FREE;
117 if (code)
118 vcsf->type |= VCSF_TRANS;
119 if (type)
120 vcsf->errorf = vcsferror;
121 if (!vcsfio(sp, vcsf, type))
122 {
123 type = ((type & VC_OPTIONAL) && !vcsf->type) ? 0 : -1;
124 free(vcsf);
125 return type;
126 }
127 if (code && (code->trans = vcsf->trans))
128 code->use = 1;
129 return type ? 1 : vcsf->type;
130 }
131
132 static int
encode(State_t * state,Sfio_t * sp,const char * path)133 encode(State_t* state, Sfio_t* sp, const char* path)
134 {
135 char* p;
136 struct stat st;
137
138 if (!push(sp, NiL, NiL, 0))
139 {
140 if (!state->output.use)
141 state->output = state->input;
142 if (push(sp, NiL, state->output.trans, VC_ENCODE) < 0)
143 {
144 error(2, "%s: cannot push vcodex encode discipline (%s)", path, state->output.trans);
145 return -1;
146 }
147 if (!ZIPSUFFIX(path, p) && *state->input.suffix && !stat(path, &st) && S_ISREG(st.st_mode))
148 {
149 p = sfprints("%s%s", path, state->input.suffix);
150 if (rename(path, p))
151 error(ERROR_SYSTEM|1, "%s: cannot rename to %s", path, p);
152 else
153 path = (const char*)p;
154 }
155 if (state->verbose)
156 error(0, "sort vcodex encode %s (%s)", path, state->output.trans);
157 }
158 return 0;
159 }
160
161 static int
vcodex(Rs_t * rs,int op,Void_t * data,Void_t * arg,Rsdisc_t * disc)162 vcodex(Rs_t* rs, int op, Void_t* data, Void_t* arg, Rsdisc_t* disc)
163 {
164 int i;
165 char* s;
166 Delay_t* delay;
167 State_t* state = (State_t*)disc;
168
169 if (state->test & 0x10)
170 error(0, "sort vcodex event %s %p %s"
171 , op == RS_FILE_WRITE ? "RS_FILE_WRITE"
172 : op == RS_FILE_READ ? "RS_FILE_READ"
173 : op == RS_TEMP_WRITE ? "RS_TEMP_WRITE"
174 : op == RS_TEMP_READ ? "RS_TEMP_READ"
175 : "UNKNOWN"
176 , data
177 , arg);
178 switch (op)
179 {
180 case RS_FILE_WRITE:
181 if ((!state->outputs++ || (Sfio_t*)data == sfstdout || zipit(arg)) && (state->output.use > 0 || !state->output.use && state->input.use > 0))
182 return encode(state, (Sfio_t*)data, (char*)arg);
183 if (!state->output.use && zipit(arg) && (arg || (arg = (Void_t*)"(output-stream)")) && (delay = newof(0, Delay_t, 1, strlen(arg))))
184 {
185 delay->sp = (Sfio_t*)data;
186 strcpy(delay->name, arg);
187 delay->next = state->delay;
188 state->delay = delay;
189 }
190 break;
191 case RS_FILE_READ:
192 if (state->input.use >= 0)
193 {
194 if ((i = push((Sfio_t*)data, &state->input, NiL, VC_DECODE|VC_OPTIONAL)) < 0)
195 {
196 error(2, "%s: cannot push vcodex decode discipline (%s)", arg, state->input.trans);
197 return -1;
198 }
199 else if (i > 0)
200 {
201 if (state->verbose)
202 error(0, "sort vcodex decode %s (%s)", arg, state->input.trans);
203 if (state->delay)
204 {
205 i = 0;
206 while (delay = state->delay)
207 {
208 if (!i && state->input.use > 0 && !sfseek(delay->sp, (Sfoff_t)0, SEEK_CUR))
209 i = encode(state, delay->sp, delay->name);
210 state->delay = delay->next;
211 free(delay);
212 }
213 return i;
214 }
215 if (!*state->input.suffix && ZIPSUFFIX(arg, s))
216 strncopy(state->input.suffix, s, sizeof(state->input.suffix));
217 }
218 }
219 break;
220 case RS_TEMP_WRITE:
221 if (state->temporary.use > 0 || !state->temporary.use && state->input.use > 0)
222 {
223 if (!state->temporary.use)
224 state->temporary = state->input;
225 if (push((Sfio_t*)data, NiL, state->temporary.trans, VC_ENCODE) < 0)
226 {
227 error(2, "temporary-%d: cannot push vcodex encode discipline (%s)", tempid(state, data), state->temporary.trans);
228 return -1;
229 }
230 if (state->verbose)
231 error(0, "sort vcodex encode temporary-%d (%s)", tempid(state, data), state->temporary.trans);
232 return 1;
233 }
234 break;
235 case RS_TEMP_READ:
236 if (state->temporary.use > 0 || !state->temporary.use && state->input.use > 0)
237 {
238 if (!state->temporary.use)
239 state->temporary = state->input;
240 if (!sfdisc((Sfio_t*)data, SF_POPDISC) || sfseek((Sfio_t*)data, (Sfoff_t)0, SEEK_SET))
241 {
242 error(2, "temporary-%d: cannot rewind temporary data", tempid(state, data));
243 return -1;
244 }
245 if ((i = push((Sfio_t*)data, NiL, NiL, VC_DECODE)) < 0)
246 {
247 error(2, "temporary-%d: cannot push vcodex decode discipline", tempid(state, data));
248 return -1;
249 }
250 else if (i > 0 && state->verbose)
251 error(0, "sort vcodex decode temporary-%d", tempid(state, data));
252 return 1;
253 }
254 break;
255 default:
256 return -1;
257 }
258 return 0;
259 }
260
261 Rsdisc_t*
rs_disc(Rskey_t * key,const char * options)262 rs_disc(Rskey_t* key, const char* options)
263 {
264 State_t* state;
265
266 if (!(state = newof(0, State_t, 1, 0)))
267 error(ERROR_SYSTEM|3, "out of space");
268 if (options)
269 {
270 for (;;)
271 {
272 switch (optstr(options, usage))
273 {
274 case 0:
275 break;
276 case 'i':
277 if (!opt_info.arg)
278 state->input.use = -1;
279 else if (streq(opt_info.arg, "-"))
280 state->input.use = 0;
281 else
282 {
283 state->input.trans = opt_info.arg;
284 state->input.use = 1;
285 }
286 continue;
287 case 'o':
288 if (!opt_info.arg)
289 state->output.use = -1;
290 else if (streq(opt_info.arg, "-"))
291 state->output.use = 0;
292 else
293 {
294 state->output.trans = opt_info.arg;
295 state->output.use = 1;
296 }
297 continue;
298 case 'r':
299 state->regress = 1;
300 continue;
301 case 't':
302 if (!opt_info.arg)
303 state->temporary.use = -1;
304 else if (streq(opt_info.arg, "-"))
305 state->temporary.use = 0;
306 else
307 {
308 state->temporary.trans = opt_info.arg;
309 state->temporary.use = 1;
310 }
311 continue;
312 case 'v':
313 state->verbose = 1;
314 continue;
315 case 'T':
316 state->test |= opt_info.num;
317 continue;
318 case '?':
319 error(ERROR_USAGE|4, "%s", opt_info.arg);
320 goto drop;
321 case ':':
322 error(2, "%s", opt_info.arg);
323 goto drop;
324 }
325 break;
326 }
327 }
328 if (state->temporary.use >= 0)
329 key->type |= RS_TEXT;
330 state->disc.eventf = vcodex;
331 state->disc.events = RS_FILE_WRITE|RS_FILE_READ|RS_TEMP_WRITE|RS_TEMP_READ;
332 return &state->disc;
333 drop:
334 free(state);
335 return 0;
336 }
337
338 SORTLIB(vcodex)
339