1 
2 /* Compiler implementation of the D programming language
3  * Copyright (C) 1999-2019 by The D Language Foundation, All Rights Reserved
4  * written by Walter Bright
5  * http://www.digitalmars.com
6  * Distributed under the Boost Software License, Version 1.0.
7  * http://www.boost.org/LICENSE_1_0.txt
8  * https://github.com/D-Programming-Language/dmd/blob/master/src/macro.c
9  */
10 
11 /* Simple macro text processor.
12  */
13 
14 #include "root/dsystem.h"
15 
16 #include "mars.h"
17 #include "errors.h"
18 #include "root/rmem.h"
19 #include "root/root.h"
20 
21 #include "macro.h"
22 
23 bool isIdStart(const utf8_t *p);
24 bool isIdTail(const utf8_t *p);
25 int utfStride(const utf8_t *p);
26 
memdup(const utf8_t * p,size_t len)27 utf8_t *memdup(const utf8_t *p, size_t len)
28 {
29     return (utf8_t *)memcpy(mem.xmalloc(len), p, len);
30 }
31 
Macro(const utf8_t * name,size_t namelen,const utf8_t * text,size_t textlen)32 Macro::Macro(const utf8_t *name, size_t namelen, const utf8_t *text, size_t textlen)
33 {
34     next = NULL;
35 
36     this->name = name;
37     this->namelen = namelen;
38 
39     this->text = text;
40     this->textlen = textlen;
41     inuse = 0;
42 }
43 
44 
search(const utf8_t * name,size_t namelen)45 Macro *Macro::search(const utf8_t *name, size_t namelen)
46 {   Macro *table;
47 
48     //printf("Macro::search(%.*s)\n", namelen, name);
49     for (table = this; table; table = table->next)
50     {
51         if (table->namelen == namelen &&
52             memcmp(table->name, name, namelen) == 0)
53         {
54             //printf("\tfound %d\n", table->textlen);
55             break;
56         }
57     }
58     return table;
59 }
60 
define(Macro ** ptable,const utf8_t * name,size_t namelen,const utf8_t * text,size_t textlen)61 Macro *Macro::define(Macro **ptable, const utf8_t *name, size_t namelen, const utf8_t *text, size_t textlen)
62 {
63     //printf("Macro::define('%.*s' = '%.*s')\n", namelen, name, textlen, text);
64 
65     Macro *table;
66 
67     //assert(ptable);
68     for (table = *ptable; table; table = table->next)
69     {
70         if (table->namelen == namelen &&
71             memcmp(table->name, name, namelen) == 0)
72         {
73             table->text = text;
74             table->textlen = textlen;
75             return table;
76         }
77     }
78     table = new Macro(name, namelen, text, textlen);
79     table->next = *ptable;
80     *ptable = table;
81     return table;
82 }
83 
84 /**********************************************************
85  * Given buffer p[0..end], extract argument marg[0..marglen].
86  * Params:
87  *      n       0:      get entire argument
88  *              1..9:   get nth argument
89  *              -1:     get 2nd through end
90  */
91 
extractArgN(const utf8_t * p,size_t end,const utf8_t ** pmarg,size_t * pmarglen,int n)92 size_t extractArgN(const utf8_t *p, size_t end, const utf8_t **pmarg, size_t *pmarglen, int n)
93 {
94     /* Scan forward for matching right parenthesis.
95      * Nest parentheses.
96      * Skip over "..." and '...' strings inside HTML tags.
97      * Skip over <!-- ... --> comments.
98      * Skip over previous macro insertions
99      * Set marglen.
100      */
101     unsigned parens = 1;
102     unsigned char instring = 0;
103     unsigned incomment = 0;
104     unsigned intag = 0;
105     unsigned inexp = 0;
106     int argn = 0;
107 
108     size_t v = 0;
109 
110   Largstart:
111     // Skip first space, if any, to find the start of the macro argument
112     if (n != 1 && v < end && isspace(p[v]))
113         v++;
114     *pmarg = p + v;
115 
116     for (; v < end; v++)
117     {   utf8_t c = p[v];
118 
119         switch (c)
120         {
121             case ',':
122                 if (!inexp && !instring && !incomment && parens == 1)
123                 {
124                     argn++;
125                     if (argn == 1 && n == -1)
126                     {   v++;
127                         goto Largstart;
128                     }
129                     if (argn == n)
130                         break;
131                     if (argn + 1 == n)
132                     {   v++;
133                         goto Largstart;
134                     }
135                 }
136                 continue;
137 
138             case '(':
139                 if (!inexp && !instring && !incomment)
140                     parens++;
141                 continue;
142 
143             case ')':
144                 if (!inexp && !instring && !incomment && --parens == 0)
145                 {
146                     break;
147                 }
148                 continue;
149 
150             case '"':
151             case '\'':
152                 if (!inexp && !incomment && intag)
153                 {
154                     if (c == instring)
155                         instring = 0;
156                     else if (!instring)
157                         instring = c;
158                 }
159                 continue;
160 
161             case '<':
162                 if (!inexp && !instring && !incomment)
163                 {
164                     if (v + 6 < end &&
165                         p[v + 1] == '!' &&
166                         p[v + 2] == '-' &&
167                         p[v + 3] == '-')
168                     {
169                         incomment = 1;
170                         v += 3;
171                     }
172                     else if (v + 2 < end &&
173                         isalpha(p[v + 1]))
174                         intag = 1;
175                 }
176                 continue;
177 
178             case '>':
179                 if (!inexp)
180                     intag = 0;
181                 continue;
182 
183             case '-':
184                 if (!inexp &&
185                     !instring &&
186                     incomment &&
187                     v + 2 < end &&
188                     p[v + 1] == '-' &&
189                     p[v + 2] == '>')
190                 {
191                     incomment = 0;
192                     v += 2;
193                 }
194                 continue;
195 
196             case 0xFF:
197                 if (v + 1 < end)
198                 {
199                     if (p[v + 1] == '{')
200                         inexp++;
201                     else if (p[v + 1] == '}')
202                         inexp--;
203                 }
204                 continue;
205 
206             default:
207                 continue;
208         }
209         break;
210     }
211     if (argn == 0 && n == -1)
212         *pmarg = p + v;
213     *pmarglen = p + v - *pmarg;
214     //printf("extractArg%d('%.*s') = '%.*s'\n", n, end, p, *pmarglen, *pmarg);
215     return v;
216 }
217 
218 
219 /*****************************************************
220  * Expand macro in place in buf.
221  * Only look at the text in buf from start to end.
222  */
223 
expand(OutBuffer * buf,size_t start,size_t * pend,const utf8_t * arg,size_t arglen)224 void Macro::expand(OutBuffer *buf, size_t start, size_t *pend,
225         const utf8_t *arg, size_t arglen)
226 {
227     // limit recursive expansion
228     static int nest;
229     static const int nestLimit = 1000;
230     if (nest > nestLimit)
231     {
232         error(Loc(), "DDoc macro expansion limit exceeded; more than %d "
233             "expansions.", nestLimit);
234         return;
235     }
236     nest++;
237 
238     size_t end = *pend;
239     assert(start <= end);
240     assert(end <= buf->offset);
241 
242     /* First pass - replace $0
243      */
244     arg = memdup(arg, arglen);
245     for (size_t u = start; u + 1 < end; )
246     {
247         utf8_t *p = (utf8_t *)buf->data;   // buf->data is not loop invariant
248 
249         /* Look for $0, but not $$0, and replace it with arg.
250          */
251         if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+'))
252         {
253             if (u > start && p[u - 1] == '$')
254             {   // Don't expand $$0, but replace it with $0
255                 buf->remove(u - 1, 1);
256                 end--;
257                 u += 1; // now u is one past the closing '1'
258                 continue;
259             }
260 
261             utf8_t c = p[u + 1];
262             int n = (c == '+') ? -1 : c - '0';
263 
264             const utf8_t *marg;
265             size_t marglen;
266             if (n == 0)
267             {
268                 marg = arg;
269                 marglen = arglen;
270             }
271             else
272                 extractArgN(arg, arglen, &marg, &marglen, n);
273             if (marglen == 0)
274             {   // Just remove macro invocation
275                 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg);
276                 buf->remove(u, 2);
277                 end -= 2;
278             }
279             else if (c == '+')
280             {
281                 // Replace '$+' with 'arg'
282                 //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg);
283                 buf->remove(u, 2);
284                 buf->insert(u, marg, marglen);
285                 end += marglen - 2;
286 
287                 // Scan replaced text for further expansion
288                 size_t mend = u + marglen;
289                 expand(buf, u, &mend, NULL, 0);
290                 end += mend - (u + marglen);
291                 u = mend;
292             }
293             else
294             {
295                 // Replace '$1' with '\xFF{arg\xFF}'
296                 //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], marglen, marg);
297                 buf->data[u] = 0xFF;
298                 buf->data[u + 1] = '{';
299                 buf->insert(u + 2, marg, marglen);
300                 buf->insert(u + 2 + marglen, (const char *)"\xFF}", 2);
301                 end += -2 + 2 + marglen + 2;
302 
303                 // Scan replaced text for further expansion
304                 size_t mend = u + 2 + marglen;
305                 expand(buf, u + 2, &mend, NULL, 0);
306                 end += mend - (u + 2 + marglen);
307                 u = mend;
308             }
309             //printf("u = %d, end = %d\n", u, end);
310             //printf("#%.*s#\n", end, &buf->data[0]);
311             continue;
312         }
313 
314         u++;
315     }
316 
317     /* Second pass - replace other macros
318      */
319     for (size_t u = start; u + 4 < end; )
320     {
321         utf8_t *p = (utf8_t *)buf->data;   // buf->data is not loop invariant
322 
323         /* A valid start of macro expansion is $(c, where c is
324          * an id start character, and not $$(c.
325          */
326         if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p+u+2))
327         {
328             //printf("\tfound macro start '%c'\n", p[u + 2]);
329             utf8_t *name = p + u + 2;
330             size_t namelen = 0;
331 
332             const utf8_t *marg;
333             size_t marglen;
334 
335             size_t v;
336             /* Scan forward to find end of macro name and
337              * beginning of macro argument (marg).
338              */
339             for (v = u + 2; v < end; v+=utfStride(p+v))
340             {
341 
342                 if (!isIdTail(p+v))
343                 {   // We've gone past the end of the macro name.
344                     namelen = v - (u + 2);
345                     break;
346                 }
347             }
348 
349             v += extractArgN(p + v, end - v, &marg, &marglen, 0);
350             assert(v <= end);
351 
352             if (v < end)
353             {   // v is on the closing ')'
354                 if (u > start && p[u - 1] == '$')
355                 {   // Don't expand $$(NAME), but replace it with $(NAME)
356                     buf->remove(u - 1, 1);
357                     end--;
358                     u = v;      // now u is one past the closing ')'
359                     continue;
360                 }
361 
362                 Macro *m = search(name, namelen);
363 
364                 if (!m)
365                 {
366                     static const char undef[] = "DDOC_UNDEFINED_MACRO";
367                     m = search((const utf8_t *)undef, strlen(undef));
368                     if (m)
369                     {
370                         // Macro was not defined, so this is an expansion of
371                         //   DDOC_UNDEFINED_MACRO. Prepend macro name to args.
372                         // marg = name[ ] ~ "," ~ marg[ ];
373                         if (marglen)
374                         {
375                             utf8_t *q = (utf8_t *)mem.xmalloc(namelen + 1 + marglen);
376                             assert(q);
377                             memcpy(q, name, namelen);
378                             q[namelen] = ',';
379                             memcpy(q + namelen + 1, marg, marglen);
380                             marg = q;
381                             marglen += namelen + 1;
382                         }
383                         else
384                         {
385                             marg = name;
386                             marglen = namelen;
387                         }
388                     }
389                 }
390 
391                 if (m)
392                 {
393                     if (m->inuse && marglen == 0)
394                     {   // Remove macro invocation
395                         buf->remove(u, v + 1 - u);
396                         end -= v + 1 - u;
397                     }
398                     else if (m->inuse &&
399                              ((arglen == marglen && memcmp(arg, marg, arglen) == 0) ||
400                               (arglen + 4 == marglen &&
401                                marg[0] == 0xFF &&
402                                marg[1] == '{' &&
403                                memcmp(arg, marg + 2, arglen) == 0 &&
404                                marg[marglen - 2] == 0xFF &&
405                                marg[marglen - 1] == '}'
406                               )
407                              )
408                             )
409                     {
410                         /* Recursive expansion:
411                          *   marg is same as arg (with blue paint added)
412                          * Just leave in place.
413                          */
414                     }
415                     else
416                     {
417                         //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", m->namelen, m->name, marglen, marg, m->textlen, m->text);
418                         marg = memdup(marg, marglen);
419                         // Insert replacement text
420                         buf->spread(v + 1, 2 + m->textlen + 2);
421                         buf->data[v + 1] = 0xFF;
422                         buf->data[v + 2] = '{';
423                         memcpy(buf->data + v + 3, m->text, m->textlen);
424                         buf->data[v + 3 + m->textlen] = 0xFF;
425                         buf->data[v + 3 + m->textlen + 1] = '}';
426 
427                         end += 2 + m->textlen + 2;
428 
429                         // Scan replaced text for further expansion
430                         m->inuse++;
431                         size_t mend = v + 1 + 2+m->textlen+2;
432                         expand(buf, v + 1, &mend, marg, marglen);
433                         end += mend - (v + 1 + 2+m->textlen+2);
434                         m->inuse--;
435 
436                         buf->remove(u, v + 1 - u);
437                         end -= v + 1 - u;
438                         u += mend - (v + 1);
439                         mem.xfree(const_cast<utf8_t *>(marg));
440                         //printf("u = %d, end = %d\n", u, end);
441                         //printf("#%.*s#\n", end - u, &buf->data[u]);
442                         continue;
443                     }
444                 }
445                 else
446                 {
447                     // Replace $(NAME) with nothing
448                     buf->remove(u, v + 1 - u);
449                     end -= (v + 1 - u);
450                     continue;
451                 }
452             }
453         }
454         u++;
455     }
456     mem.xfree(const_cast<utf8_t *>(arg));
457     *pend = end;
458     nest--;
459 }
460