1 /*******************WARNING*********************
2 
3 This is a *MODIFIED* version of Geoff Coller's proof-of-concept NOV
4 implementation.
5 
6 It has been modified to support threading directly from a file handle
7 to a NNTP server without a temporary file.
8 
9 This is not a complete distribution.  We have only distributed enough
10 to support NN's needs.
11 
12 The original version came from world.std.com:/src/news/nov.dist.tar.Z
13 and was dated 11 Aug 1993.
14 
15 In any case, bugs you find here are probably my fault, as I've trimmed
16 a fair bit of unused code.
17 
18 -Peter Wemm  <peter@DIALix.oz.au>
19 */
20 
21 /*
22  * Copyright (c) Geoffrey Collyer 1992, 1993.
23  * All rights reserved.
24  * Written by Geoffrey Collyer.
25  * Thanks to UUNET Communications Services Inc for financial support.
26  *
27  * This software is not subject to any license of the American Telephone
28  * and Telegraph Company, the Regents of the University of California, or
29  * the Free Software Foundation.
30  *
31  * Permission is granted to anyone to use this software for any purpose on
32  * any computer system, and to alter it and redistribute it freely, subject
33  * to the following restrictions:
34  *
35  * 1. The authors are not responsible for the consequences of use of this
36  *    software, no matter how awful, even if they arise from flaws in it.
37  *
38  * 2. The origin of this software must not be misrepresented, either by
39  *    explicit claim or by omission.  Since few users ever read sources,
40  *    credits must appear in the documentation.
41  *
42  * 3. Altered versions must be plainly marked as such, and must not be
43  *    misrepresented as being the original software.  Since few users
44  *    ever read sources, credits must appear in the documentation.
45  *
46  * 4. This notice may not be removed or altered.
47  */
48 
49 #include "config.h"
50 
51 /*
52  * split - divide a string into fields, like awk split()
53  */
54 int				/* number of fields, including overflow */
split(char * string,char * fields[],int nfields,char * sep)55 split(char *string, char *fields[], int nfields, char *sep)
56  /* fields		list is not NULL-terminated */
57  /* nfields		number of entries available in fields[] */
58  /* sep			"" white, "c" single char, "ab" [ab]+ */
59 {
60     register char  *p = string;
61     register char   c;		/* latest character */
62     register char   sepc = sep[0];
63     register char   sepc2;
64     register int    fn;
65     register char **fp = fields;
66     register char  *sepp;
67     register int    trimtrail;
68 
69     /* white space */
70     if (sepc == '\0') {
71 	while ((c = *p++) == ' ' || c == '\t')
72 	    continue;
73 	p--;
74 	trimtrail = 1;
75 	sep = " \t";		/* note, code below knows this is 2 long */
76 	sepc = ' ';
77     } else
78 	trimtrail = 0;
79     sepc2 = sep[1];		/* now we can safely pick this up */
80 
81     /* catch empties */
82     if (*p == '\0')
83 	return (0);
84 
85     /* single separator */
86     if (sepc2 == '\0') {
87 	fn = nfields;
88 	for (;;) {
89 	    *fp++ = p;
90 	    fn--;
91 	    if (fn == 0)
92 		break;
93 	    while ((c = *p++) != sepc)
94 		if (c == '\0')
95 		    return (nfields - fn);
96 	    *(p - 1) = '\0';
97 	}
98 	/* we have overflowed the fields vector -- just count them */
99 	fn = nfields;
100 	for (;;) {
101 	    while ((c = *p++) != sepc)
102 		if (c == '\0')
103 		    return (fn);
104 	    fn++;
105 	}
106 	/* not reached */
107     }
108     /* two separators */
109     if (sep[2] == '\0') {
110 	fn = nfields;
111 	for (;;) {
112 	    *fp++ = p;
113 	    fn--;
114 	    while ((c = *p++) != sepc && c != sepc2)
115 		if (c == '\0') {
116 		    if (trimtrail && **(fp - 1) == '\0')
117 			fn++;
118 		    return (nfields - fn);
119 		}
120 	    if (fn == 0)
121 		break;
122 	    *(p - 1) = '\0';
123 	    while ((c = *p++) == sepc || c == sepc2)
124 		continue;
125 	    p--;
126 	}
127 	/* we have overflowed the fields vector -- just count them */
128 	fn = nfields;
129 	while (c != '\0') {
130 	    while ((c = *p++) == sepc || c == sepc2)
131 		continue;
132 	    p--;
133 	    fn++;
134 	    while ((c = *p++) != '\0' && c != sepc && c != sepc2)
135 		continue;
136 	}
137 	/* might have to trim trailing white space */
138 	if (trimtrail) {
139 	    p--;
140 	    while ((c = *--p) == sepc || c == sepc2)
141 		continue;
142 	    p++;
143 	    if (*p != '\0') {
144 		if (fn == nfields + 1)
145 		    *p = '\0';
146 		fn--;
147 	    }
148 	}
149 	return (fn);
150     }
151     /* n separators */
152     fn = 0;
153     for (;;) {
154 	if (fn < nfields)
155 	    *fp++ = p;
156 	fn++;
157 	for (;;) {
158 	    c = *p++;
159 	    if (c == '\0')
160 		return (fn);
161 	    sepp = sep;
162 	    while ((sepc = *sepp++) != '\0' && sepc != c)
163 		continue;
164 	    if (sepc != '\0')	/* it was a separator */
165 		break;
166 	}
167 	if (fn < nfields)
168 	    *(p - 1) = '\0';
169 	for (;;) {
170 	    c = *p++;
171 	    sepp = sep;
172 	    while ((sepc = *sepp++) != '\0' && sepc != c)
173 		continue;
174 	    if (sepc == '\0')	/* it wasn't a separator */
175 		break;
176 	}
177 	p--;
178     }
179 
180     /* not reached */
181 }
182 
183 #ifdef TEST_SPLIT
184 /*
185  * test program
186  * pgm		runs regression
187  * pgm sep	splits stdin lines by sep
188  * pgm str sep	splits str by sep
189  * pgm str sep n	splits str by sep n times
190  */
191 int
main(int argc,char * argv[])192 main(int argc, char *argv[])
193 {
194     char            buf[512];
195     register int    n;
196 #define	MNF	10
197     char           *fields[MNF];
198 
199     if (argc > 4)
200 	for (n = atoi(argv[3]); n > 0; n--) {
201 	    (void) strcpy(buf, argv[1]);
202 	}
203     else if (argc > 3)
204 	for (n = atoi(argv[3]); n > 0; n--) {
205 	    (void) strcpy(buf, argv[1]);
206 	    (void) split(buf, fields, MNF, argv[2]);
207 	}
208     else if (argc > 2)
209 	dosplit(argv[1], argv[2]);
210     else if (argc > 1)
211 	while (fgets(buf, sizeof(buf), stdin) != NULL) {
212 	    buf[strlen(buf) - 1] = '\0';	/* stomp newline */
213 	    dosplit(buf, argv[1]);
214 	}
215     else
216 	regress();
217 
218     exit(0);
219 }
220 
dosplit(char * string,char * seps)221 dosplit(char *string, char *seps)
222 {
223 #define	NF	5
224     char           *fields[NF];
225     register int    nf;
226 
227     nf = split(string, fields, NF, seps);
228     print(nf, NF, fields);
229 }
230 
print(int nf,int nfp,char * fields[])231 print(int nf, int nfp, char *fields[])
232 {
233     register int    fn;
234     register int    bound;
235 
236     bound = (nf > nfp) ? nfp : nf;
237     printf("%d:\t", nf);
238     for (fn = 0; fn < bound; fn++)
239 	printf("\"%s\"%s", fields[fn], (fn + 1 < nf) ? ", " : "\n");
240 }
241 
242 #define	RNF	5		/* some table entries know this */
243 struct {
244     char           *str;
245     char           *seps;
246     int             nf;
247     char           *fi[RNF];
248 }               tests[] = {
249 
250     "", " ", 0, {
251 	""
252     },
253     " ", " ", 2, {
254 	"", ""
255     },
256     "x", " ", 1, {
257 	"x"
258     },
259     "xy", " ", 1, {
260 	"xy"
261     },
262     "x y", " ", 2, {
263 	"x", "y"
264     },
265     "abc def  g ", " ", 5, {
266 	"abc", "def", "", "g", ""
267     },
268     "  a bcd", " ", 4, {
269 	"", "", "a", "bcd"
270     },
271     "a b c d e f", " ", 6, {
272 	"a", "b", "c", "d", "e f"
273     },
274     " a b c d ", " ", 6, {
275 	"", "a", "b", "c", "d "
276     },
277 
278     "", " _", 0, {
279 	""
280     },
281     " ", " _", 2, {
282 	"", ""
283     },
284     "x", " _", 1, {
285 	"x"
286     },
287     "x y", " _", 2, {
288 	"x", "y"
289     },
290     "ab _ cd", " _", 2, {
291 	"ab", "cd"
292     },
293     " a_b  c ", " _", 5, {
294 	"", "a", "b", "c", ""
295     },
296     "a b c_d e f", " _", 6, {
297 	"a", "b", "c", "d", "e f"
298     },
299     " a b c d ", " _", 6, {
300 	"", "a", "b", "c", "d "
301     },
302 
303     "", " _~", 0, {
304 	""
305     },
306     " ", " _~", 2, {
307 	"", ""
308     },
309     "x", " _~", 1, {
310 	"x"
311     },
312     "x y", " _~", 2, {
313 	"x", "y"
314     },
315     "ab _~ cd", " _~", 2, {
316 	"ab", "cd"
317     },
318     " a_b  c~", " _~", 5, {
319 	"", "a", "b", "c", ""
320     },
321     "a b_c d~e f", " _~", 6, {
322 	"a", "b", "c", "d", "e f"
323     },
324     "~a b c d ", " _~", 6, {
325 	"", "a", "b", "c", "d "
326     },
327 
328     "", " _~-", 0, {
329 	""
330     },
331     " ", " _~-", 2, {
332 	"", ""
333     },
334     "x", " _~-", 1, {
335 	"x"
336     },
337     "x y", " _~-", 2, {
338 	"x", "y"
339     },
340     "ab _~- cd", " _~-", 2, {
341 	"ab", "cd"
342     },
343     " a_b  c~", " _~-", 5, {
344 	"", "a", "b", "c", ""
345     },
346     "a b_c-d~e f", " _~-", 6, {
347 	"a", "b", "c", "d", "e f"
348     },
349     "~a-b c d ", " _~-", 6, {
350 	"", "a", "b", "c", "d "
351     },
352 
353     "", "  ", 0, {
354 	""
355     },
356     " ", "  ", 2, {
357 	"", ""
358     },
359     "x", "  ", 1, {
360 	"x"
361     },
362     "xy", "  ", 1, {
363 	"xy"
364     },
365     "x y", "  ", 2, {
366 	"x", "y"
367     },
368     "abc def  g ", "  ", 4, {
369 	"abc", "def", "g", ""
370     },
371     "  a bcd", "  ", 3, {
372 	"", "a", "bcd"
373     },
374     "a b c d e f", "  ", 6, {
375 	"a", "b", "c", "d", "e f"
376     },
377     " a b c d ", "  ", 6, {
378 	"", "a", "b", "c", "d "
379     },
380 
381     "", "", 0, {
382 	""
383     },
384     " ", "", 0, {
385 	""
386     },
387     "x", "", 1, {
388 	"x"
389     },
390     "xy", "", 1, {
391 	"xy"
392     },
393     "x y", "", 2, {
394 	"x", "y"
395     },
396     "abc def  g ", "", 3, {
397 	"abc", "def", "g"
398     },
399     "\t a bcd", "", 2, {
400 	"a", "bcd"
401     },
402     "  a \tb\t c ", "", 3, {
403 	"a", "b", "c"
404     },
405     "a b c d e ", "", 5, {
406 	"a", "b", "c", "d", "e"
407     },
408     "a b\tc d e f", "", 6, {
409 	"a", "b", "c", "d", "e f"
410     },
411     " a b c d e f ", "", 6, {
412 	"a", "b", "c", "d", "e f "
413     },
414 
415     NULL, NULL, 0, {
416 	NULL
417     },
418 };
419 
regress(void)420 regress(void)
421 {
422     char            buf[512];
423     register int    n;
424     char           *fields[RNF + 1];
425     register int    nf;
426     register int    i;
427     register int    printit;
428     register char  *f;
429 
430     for (n = 0; tests[n].str != NULL; n++) {
431 	(void) strcpy(buf, tests[n].str);
432 	fields[RNF] = NULL;
433 	nf = split(buf, fields, RNF, tests[n].seps);
434 	printit = 0;
435 	if (nf != tests[n].nf) {
436 	    printf("split `%s' by `%s' gave %d fields, not %d\n",
437 		   tests[n].str, tests[n].seps, nf, tests[n].nf);
438 	    printit = 1;
439 	} else if (fields[RNF] != NULL) {
440 	    printf("split() went beyond array end\n");
441 	    printit = 1;
442 	} else {
443 	    for (i = 0; i < nf && i < RNF; i++) {
444 		f = fields[i];
445 		if (f == NULL)
446 		    f = "(NULL)";
447 		if (strcmp(f, tests[n].fi[i]) != 0) {
448 		    printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
449 			   tests[n].str, tests[n].seps,
450 			   i, fields[i], tests[n].fi[i]);
451 		    printit = 1;
452 		}
453 	    }
454 	}
455 	if (printit)
456 	    print(nf, RNF, fields);
457     }
458 }
459 
460 #endif
461