1 /*******************WARNING*********************
2
3 This is a *MODIFIED* version of Geoff Coller's proof-of-concept NOV
4 implementation.
5
6 It has been modified to support threading directly from a file handle
7 to a NNTP server without a temporary file.
8
9 This is not a complete distribution. We have only distributed enough
10 to support NN's needs.
11
12 The original version came from world.std.com:/src/news/nov.dist.tar.Z
13 and was dated 11 Aug 1993.
14
15 In any case, bugs you find here are probably my fault, as I've trimmed
16 a fair bit of unused code.
17
18 -Peter Wemm <peter@DIALix.oz.au>
19 */
20
21 /*
22 * Copyright (c) Geoffrey Collyer 1992, 1993.
23 * All rights reserved.
24 * Written by Geoffrey Collyer.
25 * Thanks to UUNET Communications Services Inc for financial support.
26 *
27 * This software is not subject to any license of the American Telephone
28 * and Telegraph Company, the Regents of the University of California, or
29 * the Free Software Foundation.
30 *
31 * Permission is granted to anyone to use this software for any purpose on
32 * any computer system, and to alter it and redistribute it freely, subject
33 * to the following restrictions:
34 *
35 * 1. The authors are not responsible for the consequences of use of this
36 * software, no matter how awful, even if they arise from flaws in it.
37 *
38 * 2. The origin of this software must not be misrepresented, either by
39 * explicit claim or by omission. Since few users ever read sources,
40 * credits must appear in the documentation.
41 *
42 * 3. Altered versions must be plainly marked as such, and must not be
43 * misrepresented as being the original software. Since few users
44 * ever read sources, credits must appear in the documentation.
45 *
46 * 4. This notice may not be removed or altered.
47 */
48
49 #include "config.h"
50
51 /*
52 * split - divide a string into fields, like awk split()
53 */
54 int /* number of fields, including overflow */
split(char * string,char * fields[],int nfields,char * sep)55 split(char *string, char *fields[], int nfields, char *sep)
56 /* fields list is not NULL-terminated */
57 /* nfields number of entries available in fields[] */
58 /* sep "" white, "c" single char, "ab" [ab]+ */
59 {
60 register char *p = string;
61 register char c; /* latest character */
62 register char sepc = sep[0];
63 register char sepc2;
64 register int fn;
65 register char **fp = fields;
66 register char *sepp;
67 register int trimtrail;
68
69 /* white space */
70 if (sepc == '\0') {
71 while ((c = *p++) == ' ' || c == '\t')
72 continue;
73 p--;
74 trimtrail = 1;
75 sep = " \t"; /* note, code below knows this is 2 long */
76 sepc = ' ';
77 } else
78 trimtrail = 0;
79 sepc2 = sep[1]; /* now we can safely pick this up */
80
81 /* catch empties */
82 if (*p == '\0')
83 return (0);
84
85 /* single separator */
86 if (sepc2 == '\0') {
87 fn = nfields;
88 for (;;) {
89 *fp++ = p;
90 fn--;
91 if (fn == 0)
92 break;
93 while ((c = *p++) != sepc)
94 if (c == '\0')
95 return (nfields - fn);
96 *(p - 1) = '\0';
97 }
98 /* we have overflowed the fields vector -- just count them */
99 fn = nfields;
100 for (;;) {
101 while ((c = *p++) != sepc)
102 if (c == '\0')
103 return (fn);
104 fn++;
105 }
106 /* not reached */
107 }
108 /* two separators */
109 if (sep[2] == '\0') {
110 fn = nfields;
111 for (;;) {
112 *fp++ = p;
113 fn--;
114 while ((c = *p++) != sepc && c != sepc2)
115 if (c == '\0') {
116 if (trimtrail && **(fp - 1) == '\0')
117 fn++;
118 return (nfields - fn);
119 }
120 if (fn == 0)
121 break;
122 *(p - 1) = '\0';
123 while ((c = *p++) == sepc || c == sepc2)
124 continue;
125 p--;
126 }
127 /* we have overflowed the fields vector -- just count them */
128 fn = nfields;
129 while (c != '\0') {
130 while ((c = *p++) == sepc || c == sepc2)
131 continue;
132 p--;
133 fn++;
134 while ((c = *p++) != '\0' && c != sepc && c != sepc2)
135 continue;
136 }
137 /* might have to trim trailing white space */
138 if (trimtrail) {
139 p--;
140 while ((c = *--p) == sepc || c == sepc2)
141 continue;
142 p++;
143 if (*p != '\0') {
144 if (fn == nfields + 1)
145 *p = '\0';
146 fn--;
147 }
148 }
149 return (fn);
150 }
151 /* n separators */
152 fn = 0;
153 for (;;) {
154 if (fn < nfields)
155 *fp++ = p;
156 fn++;
157 for (;;) {
158 c = *p++;
159 if (c == '\0')
160 return (fn);
161 sepp = sep;
162 while ((sepc = *sepp++) != '\0' && sepc != c)
163 continue;
164 if (sepc != '\0') /* it was a separator */
165 break;
166 }
167 if (fn < nfields)
168 *(p - 1) = '\0';
169 for (;;) {
170 c = *p++;
171 sepp = sep;
172 while ((sepc = *sepp++) != '\0' && sepc != c)
173 continue;
174 if (sepc == '\0') /* it wasn't a separator */
175 break;
176 }
177 p--;
178 }
179
180 /* not reached */
181 }
182
183 #ifdef TEST_SPLIT
184 /*
185 * test program
186 * pgm runs regression
187 * pgm sep splits stdin lines by sep
188 * pgm str sep splits str by sep
189 * pgm str sep n splits str by sep n times
190 */
191 int
main(int argc,char * argv[])192 main(int argc, char *argv[])
193 {
194 char buf[512];
195 register int n;
196 #define MNF 10
197 char *fields[MNF];
198
199 if (argc > 4)
200 for (n = atoi(argv[3]); n > 0; n--) {
201 (void) strcpy(buf, argv[1]);
202 }
203 else if (argc > 3)
204 for (n = atoi(argv[3]); n > 0; n--) {
205 (void) strcpy(buf, argv[1]);
206 (void) split(buf, fields, MNF, argv[2]);
207 }
208 else if (argc > 2)
209 dosplit(argv[1], argv[2]);
210 else if (argc > 1)
211 while (fgets(buf, sizeof(buf), stdin) != NULL) {
212 buf[strlen(buf) - 1] = '\0'; /* stomp newline */
213 dosplit(buf, argv[1]);
214 }
215 else
216 regress();
217
218 exit(0);
219 }
220
dosplit(char * string,char * seps)221 dosplit(char *string, char *seps)
222 {
223 #define NF 5
224 char *fields[NF];
225 register int nf;
226
227 nf = split(string, fields, NF, seps);
228 print(nf, NF, fields);
229 }
230
print(int nf,int nfp,char * fields[])231 print(int nf, int nfp, char *fields[])
232 {
233 register int fn;
234 register int bound;
235
236 bound = (nf > nfp) ? nfp : nf;
237 printf("%d:\t", nf);
238 for (fn = 0; fn < bound; fn++)
239 printf("\"%s\"%s", fields[fn], (fn + 1 < nf) ? ", " : "\n");
240 }
241
242 #define RNF 5 /* some table entries know this */
243 struct {
244 char *str;
245 char *seps;
246 int nf;
247 char *fi[RNF];
248 } tests[] = {
249
250 "", " ", 0, {
251 ""
252 },
253 " ", " ", 2, {
254 "", ""
255 },
256 "x", " ", 1, {
257 "x"
258 },
259 "xy", " ", 1, {
260 "xy"
261 },
262 "x y", " ", 2, {
263 "x", "y"
264 },
265 "abc def g ", " ", 5, {
266 "abc", "def", "", "g", ""
267 },
268 " a bcd", " ", 4, {
269 "", "", "a", "bcd"
270 },
271 "a b c d e f", " ", 6, {
272 "a", "b", "c", "d", "e f"
273 },
274 " a b c d ", " ", 6, {
275 "", "a", "b", "c", "d "
276 },
277
278 "", " _", 0, {
279 ""
280 },
281 " ", " _", 2, {
282 "", ""
283 },
284 "x", " _", 1, {
285 "x"
286 },
287 "x y", " _", 2, {
288 "x", "y"
289 },
290 "ab _ cd", " _", 2, {
291 "ab", "cd"
292 },
293 " a_b c ", " _", 5, {
294 "", "a", "b", "c", ""
295 },
296 "a b c_d e f", " _", 6, {
297 "a", "b", "c", "d", "e f"
298 },
299 " a b c d ", " _", 6, {
300 "", "a", "b", "c", "d "
301 },
302
303 "", " _~", 0, {
304 ""
305 },
306 " ", " _~", 2, {
307 "", ""
308 },
309 "x", " _~", 1, {
310 "x"
311 },
312 "x y", " _~", 2, {
313 "x", "y"
314 },
315 "ab _~ cd", " _~", 2, {
316 "ab", "cd"
317 },
318 " a_b c~", " _~", 5, {
319 "", "a", "b", "c", ""
320 },
321 "a b_c d~e f", " _~", 6, {
322 "a", "b", "c", "d", "e f"
323 },
324 "~a b c d ", " _~", 6, {
325 "", "a", "b", "c", "d "
326 },
327
328 "", " _~-", 0, {
329 ""
330 },
331 " ", " _~-", 2, {
332 "", ""
333 },
334 "x", " _~-", 1, {
335 "x"
336 },
337 "x y", " _~-", 2, {
338 "x", "y"
339 },
340 "ab _~- cd", " _~-", 2, {
341 "ab", "cd"
342 },
343 " a_b c~", " _~-", 5, {
344 "", "a", "b", "c", ""
345 },
346 "a b_c-d~e f", " _~-", 6, {
347 "a", "b", "c", "d", "e f"
348 },
349 "~a-b c d ", " _~-", 6, {
350 "", "a", "b", "c", "d "
351 },
352
353 "", " ", 0, {
354 ""
355 },
356 " ", " ", 2, {
357 "", ""
358 },
359 "x", " ", 1, {
360 "x"
361 },
362 "xy", " ", 1, {
363 "xy"
364 },
365 "x y", " ", 2, {
366 "x", "y"
367 },
368 "abc def g ", " ", 4, {
369 "abc", "def", "g", ""
370 },
371 " a bcd", " ", 3, {
372 "", "a", "bcd"
373 },
374 "a b c d e f", " ", 6, {
375 "a", "b", "c", "d", "e f"
376 },
377 " a b c d ", " ", 6, {
378 "", "a", "b", "c", "d "
379 },
380
381 "", "", 0, {
382 ""
383 },
384 " ", "", 0, {
385 ""
386 },
387 "x", "", 1, {
388 "x"
389 },
390 "xy", "", 1, {
391 "xy"
392 },
393 "x y", "", 2, {
394 "x", "y"
395 },
396 "abc def g ", "", 3, {
397 "abc", "def", "g"
398 },
399 "\t a bcd", "", 2, {
400 "a", "bcd"
401 },
402 " a \tb\t c ", "", 3, {
403 "a", "b", "c"
404 },
405 "a b c d e ", "", 5, {
406 "a", "b", "c", "d", "e"
407 },
408 "a b\tc d e f", "", 6, {
409 "a", "b", "c", "d", "e f"
410 },
411 " a b c d e f ", "", 6, {
412 "a", "b", "c", "d", "e f "
413 },
414
415 NULL, NULL, 0, {
416 NULL
417 },
418 };
419
regress(void)420 regress(void)
421 {
422 char buf[512];
423 register int n;
424 char *fields[RNF + 1];
425 register int nf;
426 register int i;
427 register int printit;
428 register char *f;
429
430 for (n = 0; tests[n].str != NULL; n++) {
431 (void) strcpy(buf, tests[n].str);
432 fields[RNF] = NULL;
433 nf = split(buf, fields, RNF, tests[n].seps);
434 printit = 0;
435 if (nf != tests[n].nf) {
436 printf("split `%s' by `%s' gave %d fields, not %d\n",
437 tests[n].str, tests[n].seps, nf, tests[n].nf);
438 printit = 1;
439 } else if (fields[RNF] != NULL) {
440 printf("split() went beyond array end\n");
441 printit = 1;
442 } else {
443 for (i = 0; i < nf && i < RNF; i++) {
444 f = fields[i];
445 if (f == NULL)
446 f = "(NULL)";
447 if (strcmp(f, tests[n].fi[i]) != 0) {
448 printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
449 tests[n].str, tests[n].seps,
450 i, fields[i], tests[n].fi[i]);
451 printit = 1;
452 }
453 }
454 }
455 if (printit)
456 print(nf, RNF, fields);
457 }
458 }
459
460 #endif
461