1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <regexp.h>
6 
7 char	digit[] = "0123456789";
8 char	*suffix = "";
9 char	*stem = "x";
10 char	suff[] = "aa";
11 char	name[200];
12 Biobuf	bout;
13 Biobuf	*output = &bout;
14 
15 extern int nextfile(void);
16 extern int matchfile(Resub*);
17 extern void openf(void);
18 extern char *fold(char*,int);
19 extern void usage(void);
20 extern void badexp(void);
21 
22 void
main(int argc,char * argv[])23 main(int argc, char *argv[])
24 {
25 	Reprog *exp;
26 	char *pattern = 0;
27 	int n = 1000;
28 	char *line;
29 	int xflag = 0;
30 	int iflag = 0;
31 	Biobuf bin;
32 	Biobuf *b = &bin;
33 	char buf[256];
34 
35 	ARGBEGIN {
36 	case 'l':
37 	case 'n':
38 		n=atoi(EARGF(usage()));
39 		break;
40 	case 'e':
41 		pattern = strdup(EARGF(usage()));
42 		break;
43 	case 'f':
44 		stem = strdup(EARGF(usage()));
45 		break;
46 	case 's':
47 		suffix = strdup(EARGF(usage()));
48 		break;
49 	case 'x':
50 		xflag++;
51 		break;
52 	case 'i':
53 		iflag++;
54 		break;
55 	default:
56 		usage();
57 		break;
58 
59 	} ARGEND;
60 
61 	if(argc < 0 || argc > 1)
62 		usage();
63 
64 	if(argc != 0) {
65 		b = Bopen(argv[0], OREAD);
66 		if(b == nil) {
67 			fprint(2, "split: can't open %s: %r\n", argv[0]);
68 			exits("open");
69 		}
70 	} else
71 		Binit(b, 0, OREAD);
72 
73 	if(pattern) {
74 		if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
75 			badexp();
76 		while((line=Brdline(b,'\n')) != 0) {
77 			Resub match[2];
78 			memset(match, 0, sizeof match);
79 			line[Blinelen(b)-1] = 0;
80 			if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
81 				if(matchfile(match) && xflag)
82 					continue;
83 			} else if(output == 0)
84 				nextfile();	/* at most once */
85 			Bwrite(output, line, Blinelen(b)-1);
86 			Bputc(output, '\n');
87 		}
88 	} else {
89 		int linecnt = n;
90 
91 		while((line=Brdline(b,'\n')) != 0) {
92 			if(++linecnt > n) {
93 				nextfile();
94 				linecnt = 1;
95 			}
96 			Bwrite(output, line, Blinelen(b));
97 		}
98 
99 		/*
100 		 * in case we didn't end with a newline, tack whatever's
101 		 * left onto the last file
102 		 */
103 		while((n = Bread(b, buf, sizeof(buf))) > 0)
104 			Bwrite(output, buf, n);
105 	}
106 	if(b != nil)
107 		Bterm(b);
108 	exits(0);
109 }
110 
111 int
nextfile(void)112 nextfile(void)
113 {
114 	static int canopen = 1;
115 	if(suff[0] > 'z') {
116 		if(canopen)
117 			fprint(2, "split: file %szz not split\n",stem);
118 		canopen = 0;
119 	} else {
120 		strcpy(name, stem);
121 		strcat(name, suff);
122 		if(++suff[1] > 'z')
123 			suff[1] = 'a', ++suff[0];
124 		openf();
125 	}
126 	return canopen;
127 }
128 
129 int
matchfile(Resub * match)130 matchfile(Resub *match)
131 {
132 	if(match[1].s.sp) {
133 		int len = match[1].e.ep - match[1].s.sp;
134 		strncpy(name, match[1].s.sp, len);
135 		strcpy(name+len, suffix);
136 		openf();
137 		return 1;
138 	}
139 	return nextfile();
140 }
141 
142 void
openf(void)143 openf(void)
144 {
145 	static int fd = 0;
146 	Bflush(output);
147 	Bterm(output);
148 	if(fd > 0)
149 		close(fd);
150 	fd = create(name,OWRITE,0666);
151 	if(fd < 0) {
152 		fprint(2, "grep: can't create %s: %r\n", name);
153 		exits("create");
154 	}
155 	Binit(output, fd, OWRITE);
156 }
157 
158 char *
fold(char * s,int n)159 fold(char *s, int n)
160 {
161 	static char *fline;
162 	static int linesize = 0;
163 	char *t;
164 
165 	if(linesize < n+1){
166 		fline = realloc(fline,n+1);
167 		linesize = n+1;
168 	}
169 	for(t=fline; *t++ = tolower((uchar)*s++); )
170 		continue;
171 		/* we assume the 'A'-'Z' only appear as themselves
172 		 * in a utf encoding.
173 		 */
174 	return fline;
175 }
176 
177 void
usage(void)178 usage(void)
179 {
180 	fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
181 	exits("usage");
182 }
183 
184 void
badexp(void)185 badexp(void)
186 {
187 	fprint(2, "split: bad regular expression\n");
188 	exits("bad regular expression");
189 }
190