1 /*** dsort.c -- sort FILEs or stdin chronologically
2 *
3 * Copyright (C) 2011-2016 Sebastian Freundt
4 *
5 * Author: Sebastian Freundt <freundt@ga-group.nl>
6 *
7 * This file is part of dateutils.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * 3. Neither the name of the author nor the names of any contributors
21 * may be used to endorse or promote products derived from this
22 * software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
34 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 **/
37 #if defined HAVE_CONFIG_H
38 # include "config.h"
39 #endif /* HAVE_CONFIG_H */
40 #include <unistd.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <stdint.h>
44 #include <sys/time.h>
45 #include <sys/wait.h>
46 #include <fcntl.h>
47 #include <time.h>
48
49 #include "dt-core.h"
50 #include "dt-io.h"
51 #include "dt-locale.h"
52 #include "prchunk.h"
53
54 const char *prog = "dsort";
55
56 struct prln_ctx_s {
57 struct grep_atom_soa_s *ndl;
58 zif_t fromz;
59 int outfd;
60 };
61
62 struct sort_ctx_s {
63 unsigned int revp:1U;
64 unsigned int unqp:1U;
65 };
66
67
68 static void
safe_write(int fd,const char * buf,size_t bsz)69 safe_write(int fd, const char *buf, size_t bsz)
70 {
71 size_t tot = 0U;
72 for (ssize_t nwr;
73 tot < bsz && (nwr = write(fd, buf + tot, bsz - tot)) >= 0;
74 tot += nwr);
75 return;
76 }
77
78 static void
proc_line(struct prln_ctx_s ctx,char * line,size_t llen)79 proc_line(struct prln_ctx_s ctx, char *line, size_t llen)
80 {
81 struct dt_dt_s d;
82
83 do {
84 char buf[64U];
85 char *sp, *tp;
86 char *bp = buf;
87 const char *const ep = buf + sizeof(buf);
88
89 /* find first occurrence then */
90 d = dt_io_find_strpdt2(
91 line, llen, ctx.ndl, &sp, &tp, ctx.fromz);
92 /* print line, first thing */
93 safe_write(ctx.outfd, line, llen);
94
95 /* extend by separator */
96 *bp++ = '\001';
97 /* check if line matches */
98 if (!dt_unk_p(d)) {
99 /* match! */
100 if (!dt_sandwich_only_t_p(d)) {
101 bp += dt_strfdt(bp, ep - bp, "%F", d);
102 }
103 *bp++ = '\001';
104 if (!dt_sandwich_only_d_p(d)) {
105 bp += dt_strfdt(bp, ep - bp, "%T", d);
106 }
107 } else {
108 /* just two empty fields then, innit? */
109 *bp++ = '\001';
110 }
111 /* finalise the line and print */
112 *bp++ = '\n';
113 safe_write(ctx.outfd, buf, bp - buf);
114 } while (0);
115 return;
116 }
117
118 static int
proc_file(struct prln_ctx_s prln,const char * fn)119 proc_file(struct prln_ctx_s prln, const char *fn)
120 {
121 size_t lno = 0;
122 void *pctx;
123 int fd;
124
125 if (fn == NULL) {
126 /* stdin then innit */
127 fd = STDIN_FILENO;
128 } else if ((fd = open(fn, O_RDONLY)) < 0) {
129 serror("Error: cannot open file `%s'", fn);
130 return -1;
131 }
132
133 /* using the prchunk reader now */
134 if ((pctx = init_prchunk(fd)) == NULL) {
135 serror("Error: cannot read from `%s'", fn ?: "<stdin>");
136 return -1;
137 }
138
139 while (prchunk_fill(pctx) >= 0) {
140 for (char *line; prchunk_haslinep(pctx); lno++) {
141 size_t llen = prchunk_getline(pctx, &line);
142
143 proc_line(prln, line, llen);
144 }
145 }
146 /* get rid of resources */
147 free_prchunk(pctx);
148 close(fd);
149 return 0;
150 }
151
152
153 /* helper children, sort(1) and cut(1) */
154 static pid_t
spawn_sort(int * restrict infd,const int outfd,struct sort_ctx_s sopt)155 spawn_sort(int *restrict infd, const int outfd, struct sort_ctx_s sopt)
156 {
157 static char *cmdline[16U] = {"sort", "-t", "-k2"};
158 pid_t sortp;
159 /* to snarf off traffic from the child */
160 int intfd[2];
161
162 if (pipe(intfd) < 0) {
163 serror("pipe setup to/from sort failed");
164 return -1;
165 }
166
167 switch ((sortp = vfork())) {
168 case -1:
169 /* i am an error */
170 serror("vfork for sort failed");
171 return -1;
172
173 default:
174 /* i am the parent */
175 close(intfd[0]);
176 *infd = intfd[1];
177 /* close outfd here already */
178 close(outfd);
179 return sortp;
180
181 case 0:;
182 char **cp = cmdline + 3U;
183
184 /* i am the child */
185 if (sopt.revp) {
186 *cp++ = "-r";
187 }
188 if (sopt.unqp) {
189 *cp++ = "-u";
190 }
191 *cp++ = NULL;
192
193 /* stdout -> outfd */
194 dup2(outfd, STDOUT_FILENO);
195 /* *infd -> stdin */
196 dup2(intfd[0], STDIN_FILENO);
197 close(intfd[1]);
198
199 execvp("sort", cmdline);
200 serror("execvp(sort) failed");
201 _exit(EXIT_FAILURE);
202 }
203 }
204
205 static pid_t
spawn_cut(int * restrict infd)206 spawn_cut(int *restrict infd)
207 {
208 static char *const cmdline[] = {"cut", "-d", "-f1", NULL};
209 pid_t cutp;
210 /* to snarf off traffic from the child */
211 int intfd[2];
212
213 if (pipe(intfd) < 0) {
214 serror("pipe setup to/from cut failed");
215 return -1;
216 }
217
218 switch ((cutp = vfork())) {
219 case -1:
220 /* i am an error */
221 serror("vfork for cut failed");
222 return -1;
223
224 default:;
225 /* i am the parent */
226 close(intfd[0]);
227 *infd = intfd[1];
228 return cutp;
229
230 case 0:;
231 /* i am the child */
232 dup2(intfd[0], STDIN_FILENO);
233 close(intfd[1]);
234
235 execvp("cut", cmdline);
236 serror("execvp(cut) failed");
237 _exit(EXIT_FAILURE);
238 }
239 }
240
241
242 #include "dsort.yucc"
243
244 int
main(int argc,char * argv[])245 main(int argc, char *argv[])
246 {
247 yuck_t argi[1U];
248 char **fmt;
249 size_t nfmt;
250 zif_t fromz = NULL;
251 int rc = 0;
252 struct sort_ctx_s sopt = {0U};
253
254 if (yuck_parse(argi, argc, argv)) {
255 rc = 1;
256 goto out;
257 }
258 /* init and unescape sequences, maybe */
259 fmt = argi->input_format_args;
260 nfmt = argi->input_format_nargs;
261 if (argi->backslash_escapes_flag) {
262 for (size_t i = 0; i < nfmt; i++) {
263 dt_io_unescape(fmt[i]);
264 }
265 }
266
267 if (argi->from_locale_arg) {
268 setilocale(argi->from_locale_arg);
269 }
270 /* try and read the from and to time zones */
271 if (argi->from_zone_arg) {
272 fromz = dt_io_zone(argi->from_zone_arg);
273 }
274 if (argi->base_arg) {
275 struct dt_dt_s base = dt_strpdt(argi->base_arg, NULL, NULL);
276 dt_set_base(base);
277 }
278
279 /* prepare a mini-argi for the sort invocation */
280 if (argi->reverse_flag) {
281 sopt.revp = 1U;
282 }
283 if (argi->unique_flag) {
284 sopt.unqp = 1U;
285 }
286
287 {
288 /* process all files */
289 struct grep_atom_s __nstk[16], *needle = __nstk;
290 size_t nneedle = countof(__nstk);
291 struct grep_atom_soa_s ndlsoa;
292 struct prln_ctx_s prln = {
293 .ndl = &ndlsoa,
294 .fromz = fromz,
295 };
296 pid_t cutp, sortp;
297
298 /* lest we overflow the stack */
299 if (nfmt >= nneedle) {
300 /* round to the nearest 8-multiple */
301 nneedle = (nfmt | 7) + 1;
302 needle = calloc(nneedle, sizeof(*needle));
303 }
304 /* and now build the needles */
305 ndlsoa = build_needle(needle, nneedle, fmt, nfmt);
306
307 /* spawn children */
308 with (int ifd, ofd) {
309 if ((cutp = spawn_cut(&ifd)) < 0) {
310 goto ndl_free;
311 }
312 if ((sortp = spawn_sort(&ofd, ifd, sopt)) < 0) {
313 goto ndl_free;
314 }
315 prln.outfd = ofd;
316 }
317
318 for (size_t i = 0U; i < argi->nargs || i == 0U; i++) {
319 if (proc_file(prln, argi->args[i]) < 0) {
320 rc = 1;
321 }
322 }
323
324 /* indicate we're no longer writing to the sort helper */
325 close(prln.outfd);
326
327 /* wait for sort first */
328 with (int st) {
329 while (waitpid(sortp, &st, 0) != sortp);
330 if (WIFEXITED(st) && WEXITSTATUS(st)) {
331 rc = rc ?: WEXITSTATUS(st);
332 }
333 }
334 /* wait for cut then */
335 with (int st) {
336 while (waitpid(cutp, &st, 0) != cutp);
337 if (WIFEXITED(st) && WEXITSTATUS(st)) {
338 rc = rc ?: WEXITSTATUS(st);
339 }
340 }
341
342 ndl_free:
343 if (needle != __nstk) {
344 free(needle);
345 }
346 }
347
348 dt_io_clear_zones();
349 if (argi->from_locale_arg) {
350 setilocale(NULL);
351 }
352
353 out:
354 yuck_free(argi);
355 return rc;
356 }
357
358 /* dsort.c ends here */
359