1
2 /* Compiler implementation of the D programming language
3 * Copyright (C) 1999-2019 by The D Language Foundation, All Rights Reserved
4 * written by Walter Bright
5 * http://www.digitalmars.com
6 * Distributed under the Boost Software License, Version 1.0.
7 * http://www.boost.org/LICENSE_1_0.txt
8 * https://github.com/D-Programming-Language/dmd/blob/master/src/macro.c
9 */
10
11 /* Simple macro text processor.
12 */
13
14 #include "root/dsystem.h"
15
16 #include "mars.h"
17 #include "errors.h"
18 #include "root/rmem.h"
19 #include "root/root.h"
20
21 #include "macro.h"
22
23 bool isIdStart(const utf8_t *p);
24 bool isIdTail(const utf8_t *p);
25 int utfStride(const utf8_t *p);
26
memdup(const utf8_t * p,size_t len)27 utf8_t *memdup(const utf8_t *p, size_t len)
28 {
29 return (utf8_t *)memcpy(mem.xmalloc(len), p, len);
30 }
31
Macro(const utf8_t * name,size_t namelen,const utf8_t * text,size_t textlen)32 Macro::Macro(const utf8_t *name, size_t namelen, const utf8_t *text, size_t textlen)
33 {
34 next = NULL;
35
36 this->name = name;
37 this->namelen = namelen;
38
39 this->text = text;
40 this->textlen = textlen;
41 inuse = 0;
42 }
43
44
search(const utf8_t * name,size_t namelen)45 Macro *Macro::search(const utf8_t *name, size_t namelen)
46 { Macro *table;
47
48 //printf("Macro::search(%.*s)\n", namelen, name);
49 for (table = this; table; table = table->next)
50 {
51 if (table->namelen == namelen &&
52 memcmp(table->name, name, namelen) == 0)
53 {
54 //printf("\tfound %d\n", table->textlen);
55 break;
56 }
57 }
58 return table;
59 }
60
define(Macro ** ptable,const utf8_t * name,size_t namelen,const utf8_t * text,size_t textlen)61 Macro *Macro::define(Macro **ptable, const utf8_t *name, size_t namelen, const utf8_t *text, size_t textlen)
62 {
63 //printf("Macro::define('%.*s' = '%.*s')\n", namelen, name, textlen, text);
64
65 Macro *table;
66
67 //assert(ptable);
68 for (table = *ptable; table; table = table->next)
69 {
70 if (table->namelen == namelen &&
71 memcmp(table->name, name, namelen) == 0)
72 {
73 table->text = text;
74 table->textlen = textlen;
75 return table;
76 }
77 }
78 table = new Macro(name, namelen, text, textlen);
79 table->next = *ptable;
80 *ptable = table;
81 return table;
82 }
83
84 /**********************************************************
85 * Given buffer p[0..end], extract argument marg[0..marglen].
86 * Params:
87 * n 0: get entire argument
88 * 1..9: get nth argument
89 * -1: get 2nd through end
90 */
91
extractArgN(const utf8_t * p,size_t end,const utf8_t ** pmarg,size_t * pmarglen,int n)92 size_t extractArgN(const utf8_t *p, size_t end, const utf8_t **pmarg, size_t *pmarglen, int n)
93 {
94 /* Scan forward for matching right parenthesis.
95 * Nest parentheses.
96 * Skip over "..." and '...' strings inside HTML tags.
97 * Skip over <!-- ... --> comments.
98 * Skip over previous macro insertions
99 * Set marglen.
100 */
101 unsigned parens = 1;
102 unsigned char instring = 0;
103 unsigned incomment = 0;
104 unsigned intag = 0;
105 unsigned inexp = 0;
106 int argn = 0;
107
108 size_t v = 0;
109
110 Largstart:
111 // Skip first space, if any, to find the start of the macro argument
112 if (n != 1 && v < end && isspace(p[v]))
113 v++;
114 *pmarg = p + v;
115
116 for (; v < end; v++)
117 { utf8_t c = p[v];
118
119 switch (c)
120 {
121 case ',':
122 if (!inexp && !instring && !incomment && parens == 1)
123 {
124 argn++;
125 if (argn == 1 && n == -1)
126 { v++;
127 goto Largstart;
128 }
129 if (argn == n)
130 break;
131 if (argn + 1 == n)
132 { v++;
133 goto Largstart;
134 }
135 }
136 continue;
137
138 case '(':
139 if (!inexp && !instring && !incomment)
140 parens++;
141 continue;
142
143 case ')':
144 if (!inexp && !instring && !incomment && --parens == 0)
145 {
146 break;
147 }
148 continue;
149
150 case '"':
151 case '\'':
152 if (!inexp && !incomment && intag)
153 {
154 if (c == instring)
155 instring = 0;
156 else if (!instring)
157 instring = c;
158 }
159 continue;
160
161 case '<':
162 if (!inexp && !instring && !incomment)
163 {
164 if (v + 6 < end &&
165 p[v + 1] == '!' &&
166 p[v + 2] == '-' &&
167 p[v + 3] == '-')
168 {
169 incomment = 1;
170 v += 3;
171 }
172 else if (v + 2 < end &&
173 isalpha(p[v + 1]))
174 intag = 1;
175 }
176 continue;
177
178 case '>':
179 if (!inexp)
180 intag = 0;
181 continue;
182
183 case '-':
184 if (!inexp &&
185 !instring &&
186 incomment &&
187 v + 2 < end &&
188 p[v + 1] == '-' &&
189 p[v + 2] == '>')
190 {
191 incomment = 0;
192 v += 2;
193 }
194 continue;
195
196 case 0xFF:
197 if (v + 1 < end)
198 {
199 if (p[v + 1] == '{')
200 inexp++;
201 else if (p[v + 1] == '}')
202 inexp--;
203 }
204 continue;
205
206 default:
207 continue;
208 }
209 break;
210 }
211 if (argn == 0 && n == -1)
212 *pmarg = p + v;
213 *pmarglen = p + v - *pmarg;
214 //printf("extractArg%d('%.*s') = '%.*s'\n", n, end, p, *pmarglen, *pmarg);
215 return v;
216 }
217
218
219 /*****************************************************
220 * Expand macro in place in buf.
221 * Only look at the text in buf from start to end.
222 */
223
expand(OutBuffer * buf,size_t start,size_t * pend,const utf8_t * arg,size_t arglen)224 void Macro::expand(OutBuffer *buf, size_t start, size_t *pend,
225 const utf8_t *arg, size_t arglen)
226 {
227 // limit recursive expansion
228 static int nest;
229 static const int nestLimit = 1000;
230 if (nest > nestLimit)
231 {
232 error(Loc(), "DDoc macro expansion limit exceeded; more than %d "
233 "expansions.", nestLimit);
234 return;
235 }
236 nest++;
237
238 size_t end = *pend;
239 assert(start <= end);
240 assert(end <= buf->offset);
241
242 /* First pass - replace $0
243 */
244 arg = memdup(arg, arglen);
245 for (size_t u = start; u + 1 < end; )
246 {
247 utf8_t *p = (utf8_t *)buf->data; // buf->data is not loop invariant
248
249 /* Look for $0, but not $$0, and replace it with arg.
250 */
251 if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+'))
252 {
253 if (u > start && p[u - 1] == '$')
254 { // Don't expand $$0, but replace it with $0
255 buf->remove(u - 1, 1);
256 end--;
257 u += 1; // now u is one past the closing '1'
258 continue;
259 }
260
261 utf8_t c = p[u + 1];
262 int n = (c == '+') ? -1 : c - '0';
263
264 const utf8_t *marg;
265 size_t marglen;
266 if (n == 0)
267 {
268 marg = arg;
269 marglen = arglen;
270 }
271 else
272 extractArgN(arg, arglen, &marg, &marglen, n);
273 if (marglen == 0)
274 { // Just remove macro invocation
275 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg);
276 buf->remove(u, 2);
277 end -= 2;
278 }
279 else if (c == '+')
280 {
281 // Replace '$+' with 'arg'
282 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg);
283 buf->remove(u, 2);
284 buf->insert(u, marg, marglen);
285 end += marglen - 2;
286
287 // Scan replaced text for further expansion
288 size_t mend = u + marglen;
289 expand(buf, u, &mend, NULL, 0);
290 end += mend - (u + marglen);
291 u = mend;
292 }
293 else
294 {
295 // Replace '$1' with '\xFF{arg\xFF}'
296 //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], marglen, marg);
297 buf->data[u] = 0xFF;
298 buf->data[u + 1] = '{';
299 buf->insert(u + 2, marg, marglen);
300 buf->insert(u + 2 + marglen, (const char *)"\xFF}", 2);
301 end += -2 + 2 + marglen + 2;
302
303 // Scan replaced text for further expansion
304 size_t mend = u + 2 + marglen;
305 expand(buf, u + 2, &mend, NULL, 0);
306 end += mend - (u + 2 + marglen);
307 u = mend;
308 }
309 //printf("u = %d, end = %d\n", u, end);
310 //printf("#%.*s#\n", end, &buf->data[0]);
311 continue;
312 }
313
314 u++;
315 }
316
317 /* Second pass - replace other macros
318 */
319 for (size_t u = start; u + 4 < end; )
320 {
321 utf8_t *p = (utf8_t *)buf->data; // buf->data is not loop invariant
322
323 /* A valid start of macro expansion is $(c, where c is
324 * an id start character, and not $$(c.
325 */
326 if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p+u+2))
327 {
328 //printf("\tfound macro start '%c'\n", p[u + 2]);
329 utf8_t *name = p + u + 2;
330 size_t namelen = 0;
331
332 const utf8_t *marg;
333 size_t marglen;
334
335 size_t v;
336 /* Scan forward to find end of macro name and
337 * beginning of macro argument (marg).
338 */
339 for (v = u + 2; v < end; v+=utfStride(p+v))
340 {
341
342 if (!isIdTail(p+v))
343 { // We've gone past the end of the macro name.
344 namelen = v - (u + 2);
345 break;
346 }
347 }
348
349 v += extractArgN(p + v, end - v, &marg, &marglen, 0);
350 assert(v <= end);
351
352 if (v < end)
353 { // v is on the closing ')'
354 if (u > start && p[u - 1] == '$')
355 { // Don't expand $$(NAME), but replace it with $(NAME)
356 buf->remove(u - 1, 1);
357 end--;
358 u = v; // now u is one past the closing ')'
359 continue;
360 }
361
362 Macro *m = search(name, namelen);
363
364 if (!m)
365 {
366 static const char undef[] = "DDOC_UNDEFINED_MACRO";
367 m = search((const utf8_t *)undef, strlen(undef));
368 if (m)
369 {
370 // Macro was not defined, so this is an expansion of
371 // DDOC_UNDEFINED_MACRO. Prepend macro name to args.
372 // marg = name[ ] ~ "," ~ marg[ ];
373 if (marglen)
374 {
375 utf8_t *q = (utf8_t *)mem.xmalloc(namelen + 1 + marglen);
376 assert(q);
377 memcpy(q, name, namelen);
378 q[namelen] = ',';
379 memcpy(q + namelen + 1, marg, marglen);
380 marg = q;
381 marglen += namelen + 1;
382 }
383 else
384 {
385 marg = name;
386 marglen = namelen;
387 }
388 }
389 }
390
391 if (m)
392 {
393 if (m->inuse && marglen == 0)
394 { // Remove macro invocation
395 buf->remove(u, v + 1 - u);
396 end -= v + 1 - u;
397 }
398 else if (m->inuse &&
399 ((arglen == marglen && memcmp(arg, marg, arglen) == 0) ||
400 (arglen + 4 == marglen &&
401 marg[0] == 0xFF &&
402 marg[1] == '{' &&
403 memcmp(arg, marg + 2, arglen) == 0 &&
404 marg[marglen - 2] == 0xFF &&
405 marg[marglen - 1] == '}'
406 )
407 )
408 )
409 {
410 /* Recursive expansion:
411 * marg is same as arg (with blue paint added)
412 * Just leave in place.
413 */
414 }
415 else
416 {
417 //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", m->namelen, m->name, marglen, marg, m->textlen, m->text);
418 marg = memdup(marg, marglen);
419 // Insert replacement text
420 buf->spread(v + 1, 2 + m->textlen + 2);
421 buf->data[v + 1] = 0xFF;
422 buf->data[v + 2] = '{';
423 memcpy(buf->data + v + 3, m->text, m->textlen);
424 buf->data[v + 3 + m->textlen] = 0xFF;
425 buf->data[v + 3 + m->textlen + 1] = '}';
426
427 end += 2 + m->textlen + 2;
428
429 // Scan replaced text for further expansion
430 m->inuse++;
431 size_t mend = v + 1 + 2+m->textlen+2;
432 expand(buf, v + 1, &mend, marg, marglen);
433 end += mend - (v + 1 + 2+m->textlen+2);
434 m->inuse--;
435
436 buf->remove(u, v + 1 - u);
437 end -= v + 1 - u;
438 u += mend - (v + 1);
439 mem.xfree(const_cast<utf8_t *>(marg));
440 //printf("u = %d, end = %d\n", u, end);
441 //printf("#%.*s#\n", end - u, &buf->data[u]);
442 continue;
443 }
444 }
445 else
446 {
447 // Replace $(NAME) with nothing
448 buf->remove(u, v + 1 - u);
449 end -= (v + 1 - u);
450 continue;
451 }
452 }
453 }
454 u++;
455 }
456 mem.xfree(const_cast<utf8_t *>(arg));
457 *pend = end;
458 nest--;
459 }
460