1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 2005-2013 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *               Glenn Fowler <glenn.s.fowler@gmail.com>                *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 
22 /*
23  * sort file io vcodex discipline
24  */
25 
26 static const char usage[] =
27 "[-1lp0s5P?\n@(#)$Id: vcodex (AT&T Research) 2012-11-27 $\n]"
28 USAGE_LICENSE
29 "[+PLUGIN?vcodex - sort io vcodex discipline library]"
30 "[+DESCRIPTION?The \bvcodex\b \bsort\b(1) discipline encodes and/or "
31     "decodes input, output and temporary file data. By default temporary and "
32     "output encoding is the same as the encoding used on the first encoded "
33     "input file. Output encoding is only applied to the standard output or "
34     "to files with a path suffix containing 'z'. If encoding is applied to "
35     "a regular output file and the output file path does not have a suffix "
36     "containing 'z' and the input path has a suffix containing 'z' then the "
37     "output path is renamed by appending the input path suffix.]"
38 "[i:input?Decode the input files using \amethod\a. \b--noinput\b "
39     "disables input decoding.]:[method]"
40 "[o:output?Encode the output file using \amethod\a. \b--nooutput\b "
41     "disables output encoding.]:[method]"
42 "[r:regress?Massage \bverbose\b output for regression testing.]"
43 "[t:temporary?Encode temporary intermediate files using "
44     "\amethod\a. \b--notemporary\b disables temporary encoding.]:[method]"
45 "[T:test?Enable test code defined by \amask\a. Test code is "
46     "implementation specific. Consult the source for details.]#[mask]"
47 "[v:verbose?Enable file and stream encoding messages on the standard "
48     "error.]"
49 "[+SEE ALSO?\bsort\b(1), \bvczip\b(1), \bvcodex\b(3)]"
50 "\n\n--library=vcodex[,option[=value]...]\n\n"
51 ;
52 
53 #include <ast.h>
54 #include <error.h>
55 #include <ls.h>
56 #include <recsort.h>
57 #include <vcsfio.h>
58 
59 struct Delay_s;
60 typedef struct Delay_s Delay_t;
61 
62 struct Delay_s
63 {
64 	Delay_t*	next;
65 	Sfio_t*		sp;
66 	char		name[1];
67 };
68 
69 typedef struct Encoding_s
70 {
71 	char*		trans;
72 	char		suffix[16];
73 	int		use;
74 } Encoding_t;
75 
76 typedef struct State_s
77 {
78 	Rsdisc_t	disc;
79 	Encoding_t	input;
80 	Encoding_t	output;
81 	Encoding_t	temporary;
82 	Delay_t*	delay;
83 	unsigned long	test;
84 	int		outputs;
85 	int		regress;
86 	int		verbose;
87 } State_t;
88 
89 #define tempid(s,f)	((s)->regress?(++(s)->regress):sffileno((Sfio_t*)(f)))
90 #define ZIPSUFFIX(p,s)	((s = strrchr(p, '.')) && strchr(s, 'z') && !strchr(s, '/'))
91 
92 static int
zipit(const char * path)93 zipit(const char* path)
94 {
95 	char*	s;
96 
97 	return !path || ZIPSUFFIX(path, s);
98 }
99 
100 static void
vcsferror(const char * mesg)101 vcsferror(const char* mesg)
102 {
103 	error(2, "%s", mesg);
104 }
105 
106 static int
push(Sfio_t * sp,Encoding_t * code,const char * trans,int type)107 push(Sfio_t* sp, Encoding_t* code, const char* trans, int type)
108 {
109 	Vcsfdata_t*	vcsf;
110 
111 	if (!trans && !type)
112 		return sfraise(sp, VCSF_DISC, NiL) == VCSF_DISC;
113 	if (!(vcsf = newof(0, Vcsfdata_t, 1, 0)))
114 		return -1;
115 	vcsf->trans = (char*)trans;
116 	vcsf->type = VCSF_FREE;
117 	if (code)
118 		vcsf->type |= VCSF_TRANS;
119 	if (type)
120 		vcsf->errorf = vcsferror;
121 	if (!vcsfio(sp, vcsf, type))
122 	{
123 		type = ((type & VC_OPTIONAL) && !vcsf->type) ? 0 : -1;
124 		free(vcsf);
125 		return type;
126 	}
127 	if (code && (code->trans = vcsf->trans))
128 		code->use = 1;
129 	return type ? 1 : vcsf->type;
130 }
131 
132 static int
encode(State_t * state,Sfio_t * sp,const char * path)133 encode(State_t* state, Sfio_t* sp, const char* path)
134 {
135 	char*		p;
136 	struct stat	st;
137 
138 	if (!push(sp, NiL, NiL, 0))
139 	{
140 		if (!state->output.use)
141 			state->output = state->input;
142 		if (push(sp, NiL, state->output.trans, VC_ENCODE) < 0)
143 		{
144 			error(2, "%s: cannot push vcodex encode discipline (%s)", path, state->output.trans);
145 			return -1;
146 		}
147 		if (!ZIPSUFFIX(path, p) && *state->input.suffix && !stat(path, &st) && S_ISREG(st.st_mode))
148 		{
149 			p = sfprints("%s%s", path, state->input.suffix);
150 			if (rename(path, p))
151 				error(ERROR_SYSTEM|1, "%s: cannot rename to %s", path, p);
152 			else
153 				path = (const char*)p;
154 		}
155 		if (state->verbose)
156 			error(0, "sort vcodex encode %s (%s)", path, state->output.trans);
157 	}
158 	return 0;
159 }
160 
161 static int
vcodex(Rs_t * rs,int op,Void_t * data,Void_t * arg,Rsdisc_t * disc)162 vcodex(Rs_t* rs, int op, Void_t* data, Void_t* arg, Rsdisc_t* disc)
163 {
164 	int		i;
165 	char*		s;
166 	Delay_t*	delay;
167 	State_t*	state = (State_t*)disc;
168 
169 	if (state->test & 0x10)
170 		error(0, "sort vcodex event %s %p %s"
171 			, op == RS_FILE_WRITE ? "RS_FILE_WRITE"
172 			: op == RS_FILE_READ ? "RS_FILE_READ"
173 			: op == RS_TEMP_WRITE ? "RS_TEMP_WRITE"
174 			: op == RS_TEMP_READ ? "RS_TEMP_READ"
175 			: "UNKNOWN"
176 			, data
177 			, arg);
178 	switch (op)
179 	{
180 	case RS_FILE_WRITE:
181 		if ((!state->outputs++ || (Sfio_t*)data == sfstdout || zipit(arg)) && (state->output.use > 0 || !state->output.use && state->input.use > 0))
182 			return encode(state, (Sfio_t*)data, (char*)arg);
183 		if (!state->output.use && zipit(arg) && (arg || (arg = (Void_t*)"(output-stream)")) && (delay = newof(0, Delay_t, 1, strlen(arg))))
184 		{
185 			delay->sp = (Sfio_t*)data;
186 			strcpy(delay->name, arg);
187 			delay->next = state->delay;
188 			state->delay = delay;
189 		}
190 		break;
191 	case RS_FILE_READ:
192 		if (state->input.use >= 0)
193 		{
194 			if ((i = push((Sfio_t*)data, &state->input, NiL, VC_DECODE|VC_OPTIONAL)) < 0)
195 			{
196 				error(2, "%s: cannot push vcodex decode discipline (%s)", arg, state->input.trans);
197 				return -1;
198 			}
199 			else if (i > 0)
200 			{
201 				if (state->verbose)
202 					error(0, "sort vcodex decode %s (%s)", arg, state->input.trans);
203 				if (state->delay)
204 				{
205 					i = 0;
206 					while (delay = state->delay)
207 					{
208 						if (!i && state->input.use > 0 && !sfseek(delay->sp, (Sfoff_t)0, SEEK_CUR))
209 							i = encode(state, delay->sp, delay->name);
210 						state->delay = delay->next;
211 						free(delay);
212 					}
213 					return i;
214 				}
215 				if (!*state->input.suffix && ZIPSUFFIX(arg, s))
216 					strncopy(state->input.suffix, s, sizeof(state->input.suffix));
217 			}
218 		}
219 		break;
220 	case RS_TEMP_WRITE:
221 		if (state->temporary.use > 0 || !state->temporary.use && state->input.use > 0)
222 		{
223 			if (!state->temporary.use)
224 				state->temporary = state->input;
225 			if (push((Sfio_t*)data, NiL, state->temporary.trans, VC_ENCODE) < 0)
226 			{
227 				error(2, "temporary-%d: cannot push vcodex encode discipline (%s)", tempid(state, data), state->temporary.trans);
228 				return -1;
229 			}
230 			if (state->verbose)
231 				error(0, "sort vcodex encode temporary-%d (%s)", tempid(state, data), state->temporary.trans);
232 			return 1;
233 		}
234 		break;
235 	case RS_TEMP_READ:
236 		if (state->temporary.use > 0 || !state->temporary.use && state->input.use > 0)
237 		{
238 			if (!state->temporary.use)
239 				state->temporary = state->input;
240 			if (!sfdisc((Sfio_t*)data, SF_POPDISC) || sfseek((Sfio_t*)data, (Sfoff_t)0, SEEK_SET))
241 			{
242 				error(2, "temporary-%d: cannot rewind temporary data", tempid(state, data));
243 				return -1;
244 			}
245 			if ((i = push((Sfio_t*)data, NiL, NiL, VC_DECODE)) < 0)
246 			{
247 				error(2, "temporary-%d: cannot push vcodex decode discipline", tempid(state, data));
248 				return -1;
249 			}
250 			else if (i > 0 && state->verbose)
251 				error(0, "sort vcodex decode temporary-%d", tempid(state, data));
252 			return 1;
253 		}
254 		break;
255 	default:
256 		return -1;
257 	}
258 	return 0;
259 }
260 
261 Rsdisc_t*
rs_disc(Rskey_t * key,const char * options)262 rs_disc(Rskey_t* key, const char* options)
263 {
264 	State_t*	state;
265 
266 	if (!(state = newof(0, State_t, 1, 0)))
267 		error(ERROR_SYSTEM|3, "out of space");
268 	if (options)
269 	{
270 		for (;;)
271 		{
272 			switch (optstr(options, usage))
273 			{
274 			case 0:
275 				break;
276 			case 'i':
277 				if (!opt_info.arg)
278 					state->input.use = -1;
279 				else if (streq(opt_info.arg, "-"))
280 					state->input.use = 0;
281 				else
282 				{
283 					state->input.trans = opt_info.arg;
284 					state->input.use = 1;
285 				}
286 				continue;
287 			case 'o':
288 				if (!opt_info.arg)
289 					state->output.use = -1;
290 				else if (streq(opt_info.arg, "-"))
291 					state->output.use = 0;
292 				else
293 				{
294 					state->output.trans = opt_info.arg;
295 					state->output.use = 1;
296 				}
297 				continue;
298 			case 'r':
299 				state->regress = 1;
300 				continue;
301 			case 't':
302 				if (!opt_info.arg)
303 					state->temporary.use = -1;
304 				else if (streq(opt_info.arg, "-"))
305 					state->temporary.use = 0;
306 				else
307 				{
308 					state->temporary.trans = opt_info.arg;
309 					state->temporary.use = 1;
310 				}
311 				continue;
312 			case 'v':
313 				state->verbose = 1;
314 				continue;
315 			case 'T':
316 				state->test |= opt_info.num;
317 				continue;
318 			case '?':
319 				error(ERROR_USAGE|4, "%s", opt_info.arg);
320 				goto drop;
321 			case ':':
322 				error(2, "%s", opt_info.arg);
323 				goto drop;
324 			}
325 			break;
326 		}
327 	}
328 	if (state->temporary.use >= 0)
329 		key->type |= RS_TEXT;
330 	state->disc.eventf = vcodex;
331 	state->disc.events = RS_FILE_WRITE|RS_FILE_READ|RS_TEMP_WRITE|RS_TEMP_READ;
332 	return &state->disc;
333  drop:
334 	free(state);
335 	return 0;
336 }
337 
338 SORTLIB(vcodex)
339