1 /* $OpenBSD: split.c,v 1.5 2007/09/09 23:25:12 chl Exp $ */ 2 /* $NetBSD: split.c,v 1.2 1995/04/20 22:39:57 cgd Exp $ */ 3 4 #include <stdio.h> 5 #include <string.h> 6 7 int split(char *string, char *fields[], int nfields, char *sep); 8 9 /* 10 - split - divide a string into fields, like awk split() 11 = int split(char *string, char *fields[], int nfields, char *sep); 12 */ 13 int /* number of fields, including overflow */ 14 split(char *string, char *fields[], int nfields, char *sep) 15 { 16 register char *p = string; 17 register char c; /* latest character */ 18 register char sepc = sep[0]; 19 register char sepc2; 20 register int fn; 21 register char **fp = fields; 22 register char *sepp; 23 register int trimtrail; 24 25 /* white space */ 26 if (sepc == '\0') { 27 while ((c = *p++) == ' ' || c == '\t') 28 continue; 29 p--; 30 trimtrail = 1; 31 sep = " \t"; /* note, code below knows this is 2 long */ 32 sepc = ' '; 33 } else 34 trimtrail = 0; 35 sepc2 = sep[1]; /* now we can safely pick this up */ 36 37 /* catch empties */ 38 if (*p == '\0') 39 return(0); 40 41 /* single separator */ 42 if (sepc2 == '\0') { 43 fn = nfields; 44 for (;;) { 45 *fp++ = p; 46 fn--; 47 if (fn == 0) 48 break; 49 while ((c = *p++) != sepc) 50 if (c == '\0') 51 return(nfields - fn); 52 *(p-1) = '\0'; 53 } 54 /* we have overflowed the fields vector -- just count them */ 55 fn = nfields; 56 for (;;) { 57 while ((c = *p++) != sepc) 58 if (c == '\0') 59 return(fn); 60 fn++; 61 } 62 /* not reached */ 63 } 64 65 /* two separators */ 66 if (sep[2] == '\0') { 67 fn = nfields; 68 for (;;) { 69 *fp++ = p; 70 fn--; 71 while ((c = *p++) != sepc && c != sepc2) 72 if (c == '\0') { 73 if (trimtrail && **(fp-1) == '\0') 74 fn++; 75 return(nfields - fn); 76 } 77 if (fn == 0) 78 break; 79 *(p-1) = '\0'; 80 while ((c = *p++) == sepc || c == sepc2) 81 continue; 82 p--; 83 } 84 /* we have overflowed the fields vector -- just count them */ 85 fn = nfields; 86 while (c != '\0') { 87 while ((c = *p++) == sepc || c == sepc2) 88 continue; 89 p--; 90 fn++; 91 while ((c = *p++) != '\0' && c != sepc && c != sepc2) 92 continue; 93 } 94 /* might have to trim trailing white space */ 95 if (trimtrail) { 96 p--; 97 while ((c = *--p) == sepc || c == sepc2) 98 continue; 99 p++; 100 if (*p != '\0') { 101 if (fn == nfields+1) 102 *p = '\0'; 103 fn--; 104 } 105 } 106 return(fn); 107 } 108 109 /* n separators */ 110 fn = 0; 111 for (;;) { 112 if (fn < nfields) 113 *fp++ = p; 114 fn++; 115 for (;;) { 116 c = *p++; 117 if (c == '\0') 118 return(fn); 119 sepp = sep; 120 while ((sepc = *sepp++) != '\0' && sepc != c) 121 continue; 122 if (sepc != '\0') /* it was a separator */ 123 break; 124 } 125 if (fn < nfields) 126 *(p-1) = '\0'; 127 for (;;) { 128 c = *p++; 129 sepp = sep; 130 while ((sepc = *sepp++) != '\0' && sepc != c) 131 continue; 132 if (sepc == '\0') /* it wasn't a separator */ 133 break; 134 } 135 p--; 136 } 137 138 /* not reached */ 139 } 140 141 #ifdef TEST_SPLIT 142 143 144 /* 145 * test program 146 * pgm runs regression 147 * pgm sep splits stdin lines by sep 148 * pgm str sep splits str by sep 149 * pgm str sep n splits str by sep n times 150 */ 151 int 152 main(argc, argv) 153 int argc; 154 char *argv[]; 155 { 156 char buf[512]; 157 register int n; 158 # define MNF 10 159 char *fields[MNF]; 160 161 if (argc > 4) 162 for (n = atoi(argv[3]); n > 0; n--) { 163 (void) strlcpy(buf, argv[1], sizeof buf); 164 } 165 else if (argc > 3) 166 for (n = atoi(argv[3]); n > 0; n--) { 167 (void) strlcpy(buf, argv[1], sizeof buf); 168 (void) split(buf, fields, MNF, argv[2]); 169 } 170 else if (argc > 2) 171 dosplit(argv[1], argv[2]); 172 else if (argc > 1) 173 while (fgets(buf, sizeof(buf), stdin) != NULL) { 174 buf[strcspn(buf, "\n")] = '\0'; /* stomp newline */ 175 dosplit(buf, argv[1]); 176 } 177 else 178 regress(); 179 180 exit(0); 181 } 182 183 dosplit(string, seps) 184 char *string; 185 char *seps; 186 { 187 # define NF 5 188 char *fields[NF]; 189 register int nf; 190 191 nf = split(string, fields, NF, seps); 192 print(nf, NF, fields); 193 } 194 195 print(nf, nfp, fields) 196 int nf; 197 int nfp; 198 char *fields[]; 199 { 200 register int fn; 201 register int bound; 202 203 bound = (nf > nfp) ? nfp : nf; 204 printf("%d:\t", nf); 205 for (fn = 0; fn < bound; fn++) 206 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); 207 } 208 209 #define RNF 5 /* some table entries know this */ 210 struct { 211 char *str; 212 char *seps; 213 int nf; 214 char *fi[RNF]; 215 } tests[] = { 216 "", " ", 0, { "" }, 217 " ", " ", 2, { "", "" }, 218 "x", " ", 1, { "x" }, 219 "xy", " ", 1, { "xy" }, 220 "x y", " ", 2, { "x", "y" }, 221 "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, 222 " a bcd", " ", 4, { "", "", "a", "bcd" }, 223 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 224 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 225 226 "", " _", 0, { "" }, 227 " ", " _", 2, { "", "" }, 228 "x", " _", 1, { "x" }, 229 "x y", " _", 2, { "x", "y" }, 230 "ab _ cd", " _", 2, { "ab", "cd" }, 231 " a_b c ", " _", 5, { "", "a", "b", "c", "" }, 232 "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, 233 " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, 234 235 "", " _~", 0, { "" }, 236 " ", " _~", 2, { "", "" }, 237 "x", " _~", 1, { "x" }, 238 "x y", " _~", 2, { "x", "y" }, 239 "ab _~ cd", " _~", 2, { "ab", "cd" }, 240 " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, 241 "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, 242 "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, 243 244 "", " _~-", 0, { "" }, 245 " ", " _~-", 2, { "", "" }, 246 "x", " _~-", 1, { "x" }, 247 "x y", " _~-", 2, { "x", "y" }, 248 "ab _~- cd", " _~-", 2, { "ab", "cd" }, 249 " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, 250 "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, 251 "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, 252 253 "", " ", 0, { "" }, 254 " ", " ", 2, { "", "" }, 255 "x", " ", 1, { "x" }, 256 "xy", " ", 1, { "xy" }, 257 "x y", " ", 2, { "x", "y" }, 258 "abc def g ", " ", 4, { "abc", "def", "g", "" }, 259 " a bcd", " ", 3, { "", "a", "bcd" }, 260 "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, 261 " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, 262 263 "", "", 0, { "" }, 264 " ", "", 0, { "" }, 265 "x", "", 1, { "x" }, 266 "xy", "", 1, { "xy" }, 267 "x y", "", 2, { "x", "y" }, 268 "abc def g ", "", 3, { "abc", "def", "g" }, 269 "\t a bcd", "", 2, { "a", "bcd" }, 270 " a \tb\t c ", "", 3, { "a", "b", "c" }, 271 "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, 272 "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, 273 " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, 274 275 NULL, NULL, 0, { NULL }, 276 }; 277 278 regress() 279 { 280 char buf[512]; 281 register int n; 282 char *fields[RNF+1]; 283 register int nf; 284 register int i; 285 register int printit; 286 register char *f; 287 288 for (n = 0; tests[n].str != NULL; n++) { 289 (void) strlcpy(buf, tests[n].str, sizeof buf); 290 fields[RNF] = NULL; 291 nf = split(buf, fields, RNF, tests[n].seps); 292 printit = 0; 293 if (nf != tests[n].nf) { 294 printf("split `%s' by `%s' gave %d fields, not %d\n", 295 tests[n].str, tests[n].seps, nf, tests[n].nf); 296 printit = 1; 297 } else if (fields[RNF] != NULL) { 298 printf("split() went beyond array end\n"); 299 printit = 1; 300 } else { 301 for (i = 0; i < nf && i < RNF; i++) { 302 f = fields[i]; 303 if (f == NULL) 304 f = "(NULL)"; 305 if (strcmp(f, tests[n].fi[i]) != 0) { 306 printf("split `%s' by `%s', field %d is `%s', not `%s'\n", 307 tests[n].str, tests[n].seps, 308 i, fields[i], tests[n].fi[i]); 309 printit = 1; 310 } 311 } 312 } 313 if (printit) 314 print(nf, RNF, fields); 315 } 316 } 317 #endif 318