1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1995-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *               Glenn Fowler <glenn.s.fowler@gmail.com>                *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 
22 #include "sed.h"
23 
24 static const char usage[] =
25 "[-?\n@(#)$Id: sed (AT&T Research) 2012-03-28 $\n]"
26 USAGE_LICENSE
27 "[+NAME?sed - stream editor]"
28 "[+DESCRIPTION?\bsed\b is a stream editor that reads one or more text files,"
29 "	makes editing changes according to a script of editing commands,"
30 "	and writes the results to standard output. The script is obtained"
31 "	from either the script operand string or a combination of the"
32 "	option-arguments from the \b--expression\b and \b--file\b options.]"
33 
34 "[b:strip-blanks?Strip leading blanks from \ba\b, \bc\b, and \bi\b text.]"
35 "[e:expression?Append the editing commands in \ascript\a to the end of the"
36 "	the editing command script. \ascript\a may contain more than one"
37 "	newline separated command.]:[script]"
38 "[f:file?Append the editing commands in \ascript-file\a to the end of the"
39 "	the editing command script.]:[script-file]"
40 "[n:quiet|silent?Suppress the default output in which each line, after it is"
41 "	examined for editing, is written to standard output. Only lines"
42 "	explicitly selected for output will be written.]"
43 "[A|X:augmented?Enable augmented regular expressions; this includes negation"
44 "	and conjunction.]"
45 "[E|r:extended|regexp-extended?Enable extended regular expressions, i.e.,"
46 "	\begrep\b(1) style.]"
47 "[O:lenient?Enable lenient regular expression interpretation."
48 "	This is the default if \bgetconf CONFORMANCE\b is not \bstandard\b.]"
49 "[S:strict|posix?Enable strict regular expression interpretation. This is the"
50 "	default if \bgetconf CONFORMANCE\b is \bstandard\b. You'd be"
51 "	suprised what the lenient mode lets by.]"
52 "[m?multi-digit-reference?Enable \a\\dd\a multi-digit backreferences.]"
53 "[d?Ignored by this implementation.]"
54 "[u:unbuffered?Unbuffered output.]"
55 
56 "\n"
57 "\n[ file ... ]\n"
58 "\n"
59 
60 "[+SEE ALSO?\bawk\b(1), \bed\b(1), \bgrep\b(1), \bregex\b(3)]"
61 ;
62 
63 #if 0
64 static void	readscript(Text*, char*);
65 static void	copyscript(Text*, unsigned char*);
66 static int	initinput(int, char **);
67 static Sfio_t*	aopen(char*, int);
68 #endif
69 
70 #define ustrncmp(a,b,c) strncmp((char*)(a), (char*)(b), c)
71 
72 int reflags = 0;	/* regcomp() flags */
73 int recno = 0;		/* current record number */
74 int nflag = 0;		/* nonprint option */
75 int qflag = 0;		/* command q executed */
76 int sflag = 0;		/* substitution has occurred */
77 int bflag = 0;		/* strip leading blanks from c,a,i <text> */
78 int uflag = 0;		/* unbuffered output */
79 
80 unsigned char*	map;	/* CC_NAT*IVE => CC_ASCII map */
81 
82 void
grow(Text * t,word n)83 grow(Text *t, word n)
84 {
85 	word w = t->w - t->s;
86 	word e = t->e - t->s + (n/SF_BUFSIZE+1)*SF_BUFSIZE;
87 	t->s = oldof(t->s, unsigned char, e, 0);
88 	if(t->s == 0)
89 		error(3, "out of space");
90 	t->w = t->s + w;
91 	t->e = t->s + e;
92 }
93 
94 /* BUG: a segment that ends with a comment whose
95    last character is \ causes a diagnostic */
96 
97 void
safescript(Text * t)98 safescript(Text *t)
99 {
100 	if(t->w > t->s+1 && t->w[-2] == '\\')
101 		error(1, "script segment ends with \\");
102 }
103 
104 static Sfio_t *
aopen(char * s,int level)105 aopen(char *s, int level)
106 {
107 	Sfio_t *f = sfopen(NiL, s, "r");
108 	if(f == 0)
109 		error(ERROR_SYSTEM|level, "%s: cannot open", s);
110 	if (uflag)
111 		sfsetbuf(f, 0, 0);
112 	return f;
113 }
114 
115 static void
readscript(Text * t,char * s)116 readscript(Text *t, char *s)
117 {
118 	word n;
119 	Sfio_t *f = aopen(s, 3);
120 	for(;;) {
121 		assure(t, 4);
122 		n = sfread(f, t->w, t->e - t->w - 3);
123 		if(n <= 0)
124 			break;
125 		t->w += n;
126 	}
127 	sfclose(f);
128 	if(t->w > t->s && t->w[-1] != '\n') {
129 		*t->w++ = '\n';
130 		error(1, "newline appended to script segment");
131 	}
132 	*t->w = 0;
133 	safescript(t);
134 }
135 
136 static void
copyscript(Text * t,const unsigned char * s)137 copyscript(Text *t, const unsigned char *s)
138 {
139 	do {
140 		assure(t, 2);
141 	} while(*t->w++ = *s++);
142 	if(--t->w > t->s && t->w[-1] != '\n') {
143 		*t->w++ = '\n';
144 		*t->w = 0;
145 	}
146 	safescript(t);
147 }
148 
149 /* DATA INPUT */
150 
151 struct {
152 	int iargc;		/* # of files not fully read */
153 	char **iargv;		/* current file */
154 	Sfio_t *ifile;		/* current input file */
155 } input;
156 
157 int
readline(Text * t)158 readline(Text *t)
159 {
160 	char*	s;
161 	word	c;
162 
163 	coda();
164 	if (qflag || input.iargc <= 0)
165 		return 0;
166 	for (;;)
167 	{
168 		if (s = sfgetr(input.ifile, '\n', 1))
169 		{
170 			c = sfvalue(input.ifile);
171 			break;
172 		}
173 		if (s = sfgetr(input.ifile, '\n', -1))
174 		{
175 			c = sfvalue(input.ifile) + 1;
176 			error(1, "newline appended");
177 			break;
178 		}
179 		error_info.file = 0;
180 		error_info.line = 0;
181 		sfclose(input.ifile);
182 		do
183 		{
184 			if (--input.iargc <= 0)
185 				return 0;
186 		} while (!(input.ifile = aopen(*++input.iargv, 2)));
187 		error_info.file = *input.iargv;
188 	}
189 	assure(t, c);
190 	memcpy(t->w, s, c);
191 	t->w += c - 1;
192 	error_info.line++;
193 	recno++;
194 	sflag = 0;
195 	return 1;
196 }
197 
198 int
ateof(void)199 ateof(void)
200 {
201 	int	c;
202 
203 	if (input.iargc == 1)
204 	{
205 		if ((c = sfgetc(input.ifile)) != EOF)
206 			sfungetc(input.ifile, c);
207 		else
208 			input.iargc = 0;
209 	}
210 	return input.iargc <= 0;
211 }
212 
213 static int
initinput(int argc,char ** argv)214 initinput(int argc, char **argv)
215 {
216 	input.iargc = argc;
217 	input.iargv = argv;
218 	if(input.iargc == 0) {
219 		input.iargc = 1;	/* for ateof() */
220 		input.ifile = sfstdin;
221 	} else {
222 		while (!(input.ifile = aopen(*input.iargv, 2))) {
223 			if (--input.iargc <= 0)
224 				return 0;
225 			++input.iargv;
226 		}
227 		error_info.file = *input.iargv;
228 	}
229 	return 1;
230 }
231 
232 #if DEBUG & 1
233 
234 /* debugging code 1; compile and execute stubs.
235    simply prints the already collected script and
236    prints numbered input lines */
237 
238 void
compile(Text * script,Text * t)239 compile(Text *script, Text *t)
240 {
241 	unsigned char *s = t->s;
242 	assure(script, 1);
243 	*script->w++ = 0;
244 	while(*s) sfputc(sfstdout, *s++);
245 }
246 
247 void
execute(Text * x,Text * y)248 execute(Text *x, Text *y)
249 {
250 	x = x;
251 	sfprintf(sfstdout, "%d: %s", recno, y->s);
252 }
253 
254 #endif
255 
256 int
main(int argc,char ** argv)257 main(int argc, char **argv)
258 {
259 	int c;
260 	static Text script;
261 	static Text data;
262 	error_info.id = "sed";
263 	if (!conformance(0, 0))
264 		reflags = REG_LENIENT;
265 	map = ccmap(CC_NATIVE, CC_ASCII);
266 	while (c = optget(argv, usage))
267 		switch (c)
268 		{
269 		case 'A':
270 		case 'X':
271 			reflags |= REG_AUGMENTED;
272 			break;
273 		case 'E':
274 		case 'r':
275 			reflags |= REG_EXTENDED;
276 			break;
277 		case 'O':
278 			reflags |= REG_LENIENT;
279 			break;
280 		case 'S':
281 			reflags &= ~REG_LENIENT;
282 			break;
283 		case 'b':
284 			bflag++;
285 			break;
286 		case 'e':
287 			copyscript(&data, (unsigned char*)opt_info.arg);
288 			break;
289 		case 'f':
290 			readscript(&data, opt_info.arg);
291 			break;
292 		case 'm':
293 			reflags |= REG_MULTIREF;
294 			break;
295 		case 'n':
296 			nflag++;
297 			break;
298 		case 'd':
299 			break;
300 		case 'u':
301 			uflag++;
302 			break;
303 		case '?':
304 			error(ERROR_USAGE|4, "%s", opt_info.arg);
305 			break;
306 		case ':':
307 			error(2, "%s", opt_info.arg);
308 			break;
309 		}
310 	if (error_info.errors)
311 		error(ERROR_USAGE|4, "%s", optusage(NiL));
312 	argv += opt_info.index;
313 	argc -= opt_info.index;
314 	if(data.s == 0) {
315 		if(!*argv)
316 			error(3, "no script");
317 		copyscript(&data, (unsigned char*)*argv++);
318 		argc--;
319 	}
320 	if(ustrncmp(data.s, "#n", 2) == 0)
321 		nflag = 1;
322 	copyscript(&data, (const unsigned char*)"\n\n");  /* e.g. s/a/\ */
323 	compile(&script, &data);
324 #if DEBUG
325 	printscript(&script);
326 #endif
327 	if (uflag)
328 		sfsetbuf(sfstdout, 0, 0);
329 	if (initinput(argc, argv))
330 		for(;;) {
331 			data.w = data.s;
332 			if(!readline(&data))
333 				break;
334 			execute(&script, &data);
335 		}
336 	if(sfclose(sfstdout) < 0)
337 		error(ERROR_SYSTEM|3, stdouterr);
338 	return error_info.errors != 0;
339 }
340