1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1995-2012 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <glenn.s.fowler@gmail.com> *
18 * *
19 ***********************************************************************/
20 #pragma prototyped
21
22 #include "sed.h"
23
24 static const char usage[] =
25 "[-?\n@(#)$Id: sed (AT&T Research) 2012-03-28 $\n]"
26 USAGE_LICENSE
27 "[+NAME?sed - stream editor]"
28 "[+DESCRIPTION?\bsed\b is a stream editor that reads one or more text files,"
29 " makes editing changes according to a script of editing commands,"
30 " and writes the results to standard output. The script is obtained"
31 " from either the script operand string or a combination of the"
32 " option-arguments from the \b--expression\b and \b--file\b options.]"
33
34 "[b:strip-blanks?Strip leading blanks from \ba\b, \bc\b, and \bi\b text.]"
35 "[e:expression?Append the editing commands in \ascript\a to the end of the"
36 " the editing command script. \ascript\a may contain more than one"
37 " newline separated command.]:[script]"
38 "[f:file?Append the editing commands in \ascript-file\a to the end of the"
39 " the editing command script.]:[script-file]"
40 "[n:quiet|silent?Suppress the default output in which each line, after it is"
41 " examined for editing, is written to standard output. Only lines"
42 " explicitly selected for output will be written.]"
43 "[A|X:augmented?Enable augmented regular expressions; this includes negation"
44 " and conjunction.]"
45 "[E|r:extended|regexp-extended?Enable extended regular expressions, i.e.,"
46 " \begrep\b(1) style.]"
47 "[O:lenient?Enable lenient regular expression interpretation."
48 " This is the default if \bgetconf CONFORMANCE\b is not \bstandard\b.]"
49 "[S:strict|posix?Enable strict regular expression interpretation. This is the"
50 " default if \bgetconf CONFORMANCE\b is \bstandard\b. You'd be"
51 " suprised what the lenient mode lets by.]"
52 "[m?multi-digit-reference?Enable \a\\dd\a multi-digit backreferences.]"
53 "[d?Ignored by this implementation.]"
54 "[u:unbuffered?Unbuffered output.]"
55
56 "\n"
57 "\n[ file ... ]\n"
58 "\n"
59
60 "[+SEE ALSO?\bawk\b(1), \bed\b(1), \bgrep\b(1), \bregex\b(3)]"
61 ;
62
63 #if 0
64 static void readscript(Text*, char*);
65 static void copyscript(Text*, unsigned char*);
66 static int initinput(int, char **);
67 static Sfio_t* aopen(char*, int);
68 #endif
69
70 #define ustrncmp(a,b,c) strncmp((char*)(a), (char*)(b), c)
71
72 int reflags = 0; /* regcomp() flags */
73 int recno = 0; /* current record number */
74 int nflag = 0; /* nonprint option */
75 int qflag = 0; /* command q executed */
76 int sflag = 0; /* substitution has occurred */
77 int bflag = 0; /* strip leading blanks from c,a,i <text> */
78 int uflag = 0; /* unbuffered output */
79
80 unsigned char* map; /* CC_NAT*IVE => CC_ASCII map */
81
82 void
grow(Text * t,word n)83 grow(Text *t, word n)
84 {
85 word w = t->w - t->s;
86 word e = t->e - t->s + (n/SF_BUFSIZE+1)*SF_BUFSIZE;
87 t->s = oldof(t->s, unsigned char, e, 0);
88 if(t->s == 0)
89 error(3, "out of space");
90 t->w = t->s + w;
91 t->e = t->s + e;
92 }
93
94 /* BUG: a segment that ends with a comment whose
95 last character is \ causes a diagnostic */
96
97 void
safescript(Text * t)98 safescript(Text *t)
99 {
100 if(t->w > t->s+1 && t->w[-2] == '\\')
101 error(1, "script segment ends with \\");
102 }
103
104 static Sfio_t *
aopen(char * s,int level)105 aopen(char *s, int level)
106 {
107 Sfio_t *f = sfopen(NiL, s, "r");
108 if(f == 0)
109 error(ERROR_SYSTEM|level, "%s: cannot open", s);
110 if (uflag)
111 sfsetbuf(f, 0, 0);
112 return f;
113 }
114
115 static void
readscript(Text * t,char * s)116 readscript(Text *t, char *s)
117 {
118 word n;
119 Sfio_t *f = aopen(s, 3);
120 for(;;) {
121 assure(t, 4);
122 n = sfread(f, t->w, t->e - t->w - 3);
123 if(n <= 0)
124 break;
125 t->w += n;
126 }
127 sfclose(f);
128 if(t->w > t->s && t->w[-1] != '\n') {
129 *t->w++ = '\n';
130 error(1, "newline appended to script segment");
131 }
132 *t->w = 0;
133 safescript(t);
134 }
135
136 static void
copyscript(Text * t,const unsigned char * s)137 copyscript(Text *t, const unsigned char *s)
138 {
139 do {
140 assure(t, 2);
141 } while(*t->w++ = *s++);
142 if(--t->w > t->s && t->w[-1] != '\n') {
143 *t->w++ = '\n';
144 *t->w = 0;
145 }
146 safescript(t);
147 }
148
149 /* DATA INPUT */
150
151 struct {
152 int iargc; /* # of files not fully read */
153 char **iargv; /* current file */
154 Sfio_t *ifile; /* current input file */
155 } input;
156
157 int
readline(Text * t)158 readline(Text *t)
159 {
160 char* s;
161 word c;
162
163 coda();
164 if (qflag || input.iargc <= 0)
165 return 0;
166 for (;;)
167 {
168 if (s = sfgetr(input.ifile, '\n', 1))
169 {
170 c = sfvalue(input.ifile);
171 break;
172 }
173 if (s = sfgetr(input.ifile, '\n', -1))
174 {
175 c = sfvalue(input.ifile) + 1;
176 error(1, "newline appended");
177 break;
178 }
179 error_info.file = 0;
180 error_info.line = 0;
181 sfclose(input.ifile);
182 do
183 {
184 if (--input.iargc <= 0)
185 return 0;
186 } while (!(input.ifile = aopen(*++input.iargv, 2)));
187 error_info.file = *input.iargv;
188 }
189 assure(t, c);
190 memcpy(t->w, s, c);
191 t->w += c - 1;
192 error_info.line++;
193 recno++;
194 sflag = 0;
195 return 1;
196 }
197
198 int
ateof(void)199 ateof(void)
200 {
201 int c;
202
203 if (input.iargc == 1)
204 {
205 if ((c = sfgetc(input.ifile)) != EOF)
206 sfungetc(input.ifile, c);
207 else
208 input.iargc = 0;
209 }
210 return input.iargc <= 0;
211 }
212
213 static int
initinput(int argc,char ** argv)214 initinput(int argc, char **argv)
215 {
216 input.iargc = argc;
217 input.iargv = argv;
218 if(input.iargc == 0) {
219 input.iargc = 1; /* for ateof() */
220 input.ifile = sfstdin;
221 } else {
222 while (!(input.ifile = aopen(*input.iargv, 2))) {
223 if (--input.iargc <= 0)
224 return 0;
225 ++input.iargv;
226 }
227 error_info.file = *input.iargv;
228 }
229 return 1;
230 }
231
232 #if DEBUG & 1
233
234 /* debugging code 1; compile and execute stubs.
235 simply prints the already collected script and
236 prints numbered input lines */
237
238 void
compile(Text * script,Text * t)239 compile(Text *script, Text *t)
240 {
241 unsigned char *s = t->s;
242 assure(script, 1);
243 *script->w++ = 0;
244 while(*s) sfputc(sfstdout, *s++);
245 }
246
247 void
execute(Text * x,Text * y)248 execute(Text *x, Text *y)
249 {
250 x = x;
251 sfprintf(sfstdout, "%d: %s", recno, y->s);
252 }
253
254 #endif
255
256 int
main(int argc,char ** argv)257 main(int argc, char **argv)
258 {
259 int c;
260 static Text script;
261 static Text data;
262 error_info.id = "sed";
263 if (!conformance(0, 0))
264 reflags = REG_LENIENT;
265 map = ccmap(CC_NATIVE, CC_ASCII);
266 while (c = optget(argv, usage))
267 switch (c)
268 {
269 case 'A':
270 case 'X':
271 reflags |= REG_AUGMENTED;
272 break;
273 case 'E':
274 case 'r':
275 reflags |= REG_EXTENDED;
276 break;
277 case 'O':
278 reflags |= REG_LENIENT;
279 break;
280 case 'S':
281 reflags &= ~REG_LENIENT;
282 break;
283 case 'b':
284 bflag++;
285 break;
286 case 'e':
287 copyscript(&data, (unsigned char*)opt_info.arg);
288 break;
289 case 'f':
290 readscript(&data, opt_info.arg);
291 break;
292 case 'm':
293 reflags |= REG_MULTIREF;
294 break;
295 case 'n':
296 nflag++;
297 break;
298 case 'd':
299 break;
300 case 'u':
301 uflag++;
302 break;
303 case '?':
304 error(ERROR_USAGE|4, "%s", opt_info.arg);
305 break;
306 case ':':
307 error(2, "%s", opt_info.arg);
308 break;
309 }
310 if (error_info.errors)
311 error(ERROR_USAGE|4, "%s", optusage(NiL));
312 argv += opt_info.index;
313 argc -= opt_info.index;
314 if(data.s == 0) {
315 if(!*argv)
316 error(3, "no script");
317 copyscript(&data, (unsigned char*)*argv++);
318 argc--;
319 }
320 if(ustrncmp(data.s, "#n", 2) == 0)
321 nflag = 1;
322 copyscript(&data, (const unsigned char*)"\n\n"); /* e.g. s/a/\ */
323 compile(&script, &data);
324 #if DEBUG
325 printscript(&script);
326 #endif
327 if (uflag)
328 sfsetbuf(sfstdout, 0, 0);
329 if (initinput(argc, argv))
330 for(;;) {
331 data.w = data.s;
332 if(!readline(&data))
333 break;
334 execute(&script, &data);
335 }
336 if(sfclose(sfstdout) < 0)
337 error(ERROR_SYSTEM|3, stdouterr);
338 return error_info.errors != 0;
339 }
340