1 /*** dsort.c -- sort FILEs or stdin chronologically
2  *
3  * Copyright (C) 2011-2016 Sebastian Freundt
4  *
5  * Author:  Sebastian Freundt <freundt@ga-group.nl>
6  *
7  * This file is part of dateutils.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * 3. Neither the name of the author nor the names of any contributors
21  *    may be used to endorse or promote products derived from this
22  *    software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27  * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
34  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  **/
37 #if defined HAVE_CONFIG_H
38 # include "config.h"
39 #endif	/* HAVE_CONFIG_H */
40 #include <unistd.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <stdint.h>
44 #include <sys/time.h>
45 #include <sys/wait.h>
46 #include <fcntl.h>
47 #include <time.h>
48 
49 #include "dt-core.h"
50 #include "dt-io.h"
51 #include "dt-locale.h"
52 #include "prchunk.h"
53 
54 const char *prog = "dsort";
55 
56 struct prln_ctx_s {
57 	struct grep_atom_soa_s *ndl;
58 	zif_t fromz;
59 	int outfd;
60 };
61 
62 struct sort_ctx_s {
63 	unsigned int revp:1U;
64 	unsigned int unqp:1U;
65 };
66 
67 
68 static void
safe_write(int fd,const char * buf,size_t bsz)69 safe_write(int fd, const char *buf, size_t bsz)
70 {
71 	size_t tot = 0U;
72 	for (ssize_t nwr;
73 	     tot < bsz && (nwr = write(fd, buf + tot, bsz - tot)) >= 0;
74 	     tot += nwr);
75 	return;
76 }
77 
78 static void
proc_line(struct prln_ctx_s ctx,char * line,size_t llen)79 proc_line(struct prln_ctx_s ctx, char *line, size_t llen)
80 {
81 	struct dt_dt_s d;
82 
83 	do {
84 		char buf[64U];
85 		char *sp, *tp;
86 		char *bp = buf;
87 		const char *const ep = buf + sizeof(buf);
88 
89 		/* find first occurrence then */
90 		d = dt_io_find_strpdt2(
91 			line, llen, ctx.ndl, &sp, &tp, ctx.fromz);
92 		/* print line, first thing */
93 		safe_write(ctx.outfd, line, llen);
94 
95 		/* extend by separator */
96 		*bp++ = '\001';
97 		/* check if line matches */
98 		if (!dt_unk_p(d)) {
99 			/* match! */
100 			if (!dt_sandwich_only_t_p(d)) {
101 				bp += dt_strfdt(bp, ep - bp, "%F", d);
102 			}
103 			*bp++ = '\001';
104 			if (!dt_sandwich_only_d_p(d)) {
105 				bp += dt_strfdt(bp, ep - bp, "%T", d);
106 			}
107 		} else {
108 			/* just two empty fields then, innit? */
109 			*bp++ = '\001';
110 		}
111 		/* finalise the line and print */
112 		*bp++ = '\n';
113 		safe_write(ctx.outfd, buf, bp - buf);
114 	} while (0);
115 	return;
116 }
117 
118 static int
proc_file(struct prln_ctx_s prln,const char * fn)119 proc_file(struct prln_ctx_s prln, const char *fn)
120 {
121 	size_t lno = 0;
122 	void *pctx;
123 	int fd;
124 
125 	if (fn == NULL) {
126 		/* stdin then innit */
127 		fd = STDIN_FILENO;
128 	} else if ((fd = open(fn, O_RDONLY)) < 0) {
129 		serror("Error: cannot open file `%s'", fn);
130 		return -1;
131 	}
132 
133 	/* using the prchunk reader now */
134 	if ((pctx = init_prchunk(fd)) == NULL) {
135 		serror("Error: cannot read from `%s'", fn ?: "<stdin>");
136 		return -1;
137 	}
138 
139 	while (prchunk_fill(pctx) >= 0) {
140 		for (char *line; prchunk_haslinep(pctx); lno++) {
141 			size_t llen = prchunk_getline(pctx, &line);
142 
143 			proc_line(prln, line, llen);
144 		}
145 	}
146 	/* get rid of resources */
147 	free_prchunk(pctx);
148 	close(fd);
149 	return 0;
150 }
151 
152 
153 /* helper children, sort(1) and cut(1) */
154 static pid_t
spawn_sort(int * restrict infd,const int outfd,struct sort_ctx_s sopt)155 spawn_sort(int *restrict infd, const int outfd, struct sort_ctx_s sopt)
156 {
157 	static char *cmdline[16U] = {"sort", "-t", "-k2"};
158 	pid_t sortp;
159 	/* to snarf off traffic from the child */
160 	int intfd[2];
161 
162 	if (pipe(intfd) < 0) {
163 		serror("pipe setup to/from sort failed");
164 		return -1;
165 	}
166 
167 	switch ((sortp = vfork())) {
168 	case -1:
169 		/* i am an error */
170 		serror("vfork for sort failed");
171 		return -1;
172 
173 	default:
174 		/* i am the parent */
175 		close(intfd[0]);
176 		*infd = intfd[1];
177 		/* close outfd here already */
178 		close(outfd);
179 		return sortp;
180 
181 	case 0:;
182 		char **cp = cmdline + 3U;
183 
184 		/* i am the child */
185 		if (sopt.revp) {
186 			*cp++ = "-r";
187 		}
188 		if (sopt.unqp) {
189 			*cp++ = "-u";
190 		}
191 		*cp++ = NULL;
192 
193 		/* stdout -> outfd */
194 		dup2(outfd, STDOUT_FILENO);
195 		/* *infd -> stdin */
196 		dup2(intfd[0], STDIN_FILENO);
197 		close(intfd[1]);
198 
199 		execvp("sort", cmdline);
200 		serror("execvp(sort) failed");
201 		_exit(EXIT_FAILURE);
202 	}
203 }
204 
205 static pid_t
spawn_cut(int * restrict infd)206 spawn_cut(int *restrict infd)
207 {
208 	static char *const cmdline[] = {"cut", "-d", "-f1", NULL};
209 	pid_t cutp;
210 	/* to snarf off traffic from the child */
211 	int intfd[2];
212 
213 	if (pipe(intfd) < 0) {
214 		serror("pipe setup to/from cut failed");
215 		return -1;
216 	}
217 
218 	switch ((cutp = vfork())) {
219 	case -1:
220 		/* i am an error */
221 		serror("vfork for cut failed");
222 		return -1;
223 
224 	default:;
225 		/* i am the parent */
226 		close(intfd[0]);
227 		*infd = intfd[1];
228 		return cutp;
229 
230 	case 0:;
231 		/* i am the child */
232 		dup2(intfd[0], STDIN_FILENO);
233 		close(intfd[1]);
234 
235 		execvp("cut", cmdline);
236 		serror("execvp(cut) failed");
237 		_exit(EXIT_FAILURE);
238 	}
239 }
240 
241 
242 #include "dsort.yucc"
243 
244 int
main(int argc,char * argv[])245 main(int argc, char *argv[])
246 {
247 	yuck_t argi[1U];
248 	char **fmt;
249 	size_t nfmt;
250 	zif_t fromz = NULL;
251 	int rc = 0;
252 	struct sort_ctx_s sopt = {0U};
253 
254 	if (yuck_parse(argi, argc, argv)) {
255 		rc = 1;
256 		goto out;
257 	}
258 	/* init and unescape sequences, maybe */
259 	fmt = argi->input_format_args;
260 	nfmt = argi->input_format_nargs;
261 	if (argi->backslash_escapes_flag) {
262 		for (size_t i = 0; i < nfmt; i++) {
263 			dt_io_unescape(fmt[i]);
264 		}
265 	}
266 
267 	if (argi->from_locale_arg) {
268 		setilocale(argi->from_locale_arg);
269 	}
270 	/* try and read the from and to time zones */
271 	if (argi->from_zone_arg) {
272 		fromz = dt_io_zone(argi->from_zone_arg);
273 	}
274 	if (argi->base_arg) {
275 		struct dt_dt_s base = dt_strpdt(argi->base_arg, NULL, NULL);
276 		dt_set_base(base);
277 	}
278 
279 	/* prepare a mini-argi for the sort invocation */
280 	if (argi->reverse_flag) {
281 		sopt.revp = 1U;
282 	}
283 	if (argi->unique_flag) {
284 		sopt.unqp = 1U;
285 	}
286 
287 	{
288 		/* process all files */
289 		struct grep_atom_s __nstk[16], *needle = __nstk;
290 		size_t nneedle = countof(__nstk);
291 		struct grep_atom_soa_s ndlsoa;
292 		struct prln_ctx_s prln = {
293 			.ndl = &ndlsoa,
294 			.fromz = fromz,
295 		};
296 		pid_t cutp, sortp;
297 
298 		/* lest we overflow the stack */
299 		if (nfmt >= nneedle) {
300 			/* round to the nearest 8-multiple */
301 			nneedle = (nfmt | 7) + 1;
302 			needle = calloc(nneedle, sizeof(*needle));
303 		}
304 		/* and now build the needles */
305 		ndlsoa = build_needle(needle, nneedle, fmt, nfmt);
306 
307 		/* spawn children */
308 		with (int ifd, ofd) {
309 			if ((cutp = spawn_cut(&ifd)) < 0) {
310 				goto ndl_free;
311 			}
312 			if ((sortp = spawn_sort(&ofd, ifd, sopt)) < 0) {
313 				goto ndl_free;
314 			}
315 			prln.outfd = ofd;
316 		}
317 
318 		for (size_t i = 0U; i < argi->nargs || i == 0U; i++) {
319 			if (proc_file(prln, argi->args[i]) < 0) {
320 				rc = 1;
321 			}
322 		}
323 
324 		/* indicate we're no longer writing to the sort helper */
325 		close(prln.outfd);
326 
327 		/* wait for sort first */
328 		with (int st) {
329 			while (waitpid(sortp, &st, 0) != sortp);
330 			if (WIFEXITED(st) && WEXITSTATUS(st)) {
331 				rc = rc ?: WEXITSTATUS(st);
332 			}
333 		}
334 		/* wait for cut then */
335 		with (int st) {
336 			while (waitpid(cutp, &st, 0) != cutp);
337 			if (WIFEXITED(st) && WEXITSTATUS(st)) {
338 				rc = rc ?: WEXITSTATUS(st);
339 			}
340 		}
341 
342 	ndl_free:
343 		if (needle != __nstk) {
344 			free(needle);
345 		}
346 	}
347 
348 	dt_io_clear_zones();
349 	if (argi->from_locale_arg) {
350 		setilocale(NULL);
351 	}
352 
353 out:
354 	yuck_free(argi);
355 	return rc;
356 }
357 
358 /* dsort.c ends here */
359