1 /* $NetBSD: cook.c,v 1.5 2020/05/25 20:47:34 christos Exp $ */
2
3 /**
4 * \file cook.c
5 *
6 * This file contains the routines that deal with processing quoted strings
7 * into an internal format.
8 *
9 * @addtogroup autoopts
10 * @{
11 */
12 /*
13 * This file is part of AutoOpts, a companion to AutoGen.
14 * AutoOpts is free software.
15 * AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
16 *
17 * AutoOpts is available under any one of two licenses. The license
18 * in use must be one of these two and the choice is under the control
19 * of the user of the license.
20 *
21 * The GNU Lesser General Public License, version 3 or later
22 * See the files "COPYING.lgplv3" and "COPYING.gplv3"
23 *
24 * The Modified Berkeley Software Distribution License
25 * See the file "COPYING.mbsd"
26 *
27 * These files have the following sha256 sums:
28 *
29 * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
30 * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
31 * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
32 */
33
34 /* = = = START-STATIC-FORWARD = = = */
35 static bool
36 contiguous_quote(char ** pps, char * pq, int * lnct_p);
37 /* = = = END-STATIC-FORWARD = = = */
38
39 /*=export_func ao_string_cook_escape_char
40 * private:
41 *
42 * what: escape-process a string fragment
43 * arg: + char const * + pzScan + points to character after the escape +
44 * arg: + char * + pRes + Where to put the result byte +
45 * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
46 *
47 * ret-type: unsigned int
48 * ret-desc: The number of bytes consumed processing the escaped character.
49 *
50 * doc:
51 *
52 * This function converts "t" into "\t" and all your other favorite
53 * escapes, including numeric ones: hex and ocatal, too.
54 * The returned result tells the caller how far to advance the
55 * scan pointer (passed in). The default is to just pass through the
56 * escaped character and advance the scan by one.
57 *
58 * Some applications need to keep an escaped newline, others need to
59 * suppress it. This is accomplished by supplying a '\n' replacement
60 * character that is different from \n, if need be. For example, use
61 * 0x7F and never emit a 0x7F.
62 *
63 * err: @code{NULL} is returned if the string is mal-formed.
64 =*/
65 unsigned int
ao_string_cook_escape_char(char const * pzIn,char * pRes,uint_t nl)66 ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
67 {
68 unsigned int res = 1;
69
70 switch (*pRes = *pzIn++) {
71 case NUL: /* NUL - end of input string */
72 return 0;
73 case '\r':
74 if (*pzIn != NL)
75 return 1;
76 res++;
77 /* FALLTHROUGH */
78 case NL: /* NL - emit newline */
79 *pRes = (char)nl;
80 return res;
81
82 case 'a': *pRes = '\a'; break;
83 case 'b': *pRes = '\b'; break;
84 case 'f': *pRes = '\f'; break;
85 case 'n': *pRes = NL; break;
86 case 'r': *pRes = '\r'; break;
87 case 't': *pRes = '\t'; break;
88 case 'v': *pRes = '\v'; break;
89
90 case 'x':
91 case 'X': /* HEX Escape */
92 if (IS_HEX_DIGIT_CHAR(*pzIn)) {
93 char z[4];
94 unsigned int ct = 0;
95
96 do {
97 z[ct] = pzIn[ct];
98 if (++ct >= 2)
99 break;
100 } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
101 z[ct] = NUL;
102 *pRes = (char)strtoul(z, NULL, 16);
103 return ct + 1;
104 }
105 break;
106
107 case '0': case '1': case '2': case '3':
108 case '4': case '5': case '6': case '7':
109 {
110 /*
111 * IF the character copied was an octal digit,
112 * THEN set the output character to an octal value.
113 * The 3 octal digit result might exceed 0xFF, so check it.
114 */
115 char z[4];
116 unsigned long val;
117 unsigned int ct = 0;
118
119 z[ct++] = *--pzIn;
120 while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
121 z[ct] = pzIn[ct];
122 if (++ct >= 3)
123 break;
124 }
125
126 z[ct] = NUL;
127 val = strtoul(z, NULL, 8);
128 if (val > 0xFF)
129 val = 0xFF;
130 *pRes = (char)val;
131 return ct;
132 }
133
134 default: /* quoted character is result character */;
135 }
136
137 return res;
138 }
139
140
141 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
142 *
143 * A quoted string has been found.
144 * Find the end of it and compress any escape sequences.
145 */
146 static bool
contiguous_quote(char ** pps,char * pq,int * lnct_p)147 contiguous_quote(char ** pps, char * pq, int * lnct_p)
148 {
149 char * ps = *pps + 1;
150
151 for (;;) {
152 while (IS_WHITESPACE_CHAR(*ps))
153 if (*(ps++) == NL)
154 (*lnct_p)++;
155
156 /*
157 * IF the next character is a quote character,
158 * THEN we will concatenate the strings.
159 */
160 switch (*ps) {
161 case '"':
162 case '\'':
163 *pq = *(ps++); /* assign new quote character and return */
164 *pps = ps;
165 return true;
166
167 case '/':
168 /*
169 * Allow for a comment embedded in the concatenated string.
170 */
171 switch (ps[1]) {
172 default:
173 *pps = NULL;
174 return false;
175
176 case '/':
177 /*
178 * Skip to end of line
179 */
180 ps = strchr(ps, NL);
181 if (ps == NULL) {
182 *pps = NULL;
183 return false;
184 }
185 break;
186
187 case '*':
188 {
189 char * p = strstr( ps+2, "*/" );
190 /*
191 * Skip to terminating star slash
192 */
193 if (p == NULL) {
194 *pps = NULL;
195 return false;
196 }
197
198 while (ps < p) {
199 if (*(ps++) == NL)
200 (*lnct_p)++;
201 }
202
203 ps = p + 2;
204 }
205 }
206 continue;
207
208 default:
209 /*
210 * The next non-whitespace character is not a quote.
211 * The series of quoted strings has come to an end.
212 */
213 *pps = ps;
214 return false;
215 }
216 }
217 }
218
219 /*=export_func ao_string_cook
220 * private:
221 *
222 * what: concatenate and escape-process strings
223 * arg: + char * + pzScan + The *MODIFIABLE* input buffer +
224 * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count +
225 *
226 * ret-type: char *
227 * ret-desc: The address of the text following the processed strings.
228 * The return value is NULL if the strings are ill-formed.
229 *
230 * doc:
231 *
232 * A series of one or more quoted strings are concatenated together.
233 * If they are quoted with double quotes (@code{"}), then backslash
234 * escapes are processed per the C programming language. If they are
235 * single quote strings, then the backslashes are honored only when they
236 * precede another backslash or a single quote character.
237 *
238 * err: @code{NULL} is returned if the string(s) is/are mal-formed.
239 =*/
240 char *
ao_string_cook(char * pzScan,int * lnct_p)241 ao_string_cook(char * pzScan, int * lnct_p)
242 {
243 int l = 0;
244 char q = *pzScan;
245
246 /*
247 * It is a quoted string. Process the escape sequence characters
248 * (in the set "abfnrtv") and make sure we find a closing quote.
249 */
250 char * pzD = pzScan++;
251 char * pzS = pzScan;
252
253 if (lnct_p == NULL)
254 lnct_p = &l;
255
256 for (;;) {
257 /*
258 * IF the next character is the quote character, THEN we may end the
259 * string. We end it unless the next non-blank character *after* the
260 * string happens to also be a quote. If it is, then we will change
261 * our quote character to the new quote character and continue
262 * condensing text.
263 */
264 while (*pzS == q) {
265 *pzD = NUL; /* This is probably the end of the line */
266 if (! contiguous_quote(&pzS, &q, lnct_p))
267 return pzS;
268 }
269
270 /*
271 * We are inside a quoted string. Copy text.
272 */
273 switch (*(pzD++) = *(pzS++)) {
274 case NUL:
275 return NULL;
276
277 case NL:
278 (*lnct_p)++;
279 break;
280
281 case '\\':
282 /*
283 * IF we are escaping a new line,
284 * THEN drop both the escape and the newline from
285 * the result string.
286 */
287 if (*pzS == NL) {
288 pzS++;
289 pzD--;
290 (*lnct_p)++;
291 }
292
293 /*
294 * ELSE IF the quote character is '"' or '`',
295 * THEN we do the full escape character processing
296 */
297 else if (q != '\'') {
298 unsigned int ct;
299 ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
300 if (ct == 0)
301 return NULL;
302
303 pzS += ct;
304 } /* if (q != '\'') */
305
306 /*
307 * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
308 * The latter only to easily hide preprocessing directives.
309 */
310 else switch (*pzS) {
311 case '\\':
312 case '\'':
313 case '#':
314 pzD[-1] = *pzS++;
315 }
316 } /* switch (*(pzD++) = *(pzS++)) */
317 } /* for (;;) */
318 }
319
320 /** @}
321 *
322 * Local Variables:
323 * mode: C
324 * c-file-style: "stroustrup"
325 * indent-tabs-mode: nil
326 * End:
327 * end of autoopts/cook.c */
328