1 
2 /* Compiler implementation of the D programming language
3  * Copyright (C) 1999-2019 by The D Language Foundation, All Rights Reserved
4  * written by Walter Bright
5  * http://www.digitalmars.com
6  * Distributed under the Boost Software License, Version 1.0.
7  * http://www.boost.org/LICENSE_1_0.txt
8  * https://github.com/D-Programming-Language/dmd/blob/master/src/doc.c
9  */
10 
11 // This implements the Ddoc capability.
12 
13 #include "root/dsystem.h"
14 #include "root/rmem.h"
15 #include "root/root.h"
16 #include "root/port.h"
17 #include "root/aav.h"
18 
19 #include "attrib.h"
20 #include "cond.h"
21 #include "mars.h"
22 #include "dsymbol.h"
23 #include "macro.h"
24 #include "template.h"
25 #include "lexer.h"
26 #include "aggregate.h"
27 #include "declaration.h"
28 #include "statement.h"
29 #include "enum.h"
30 #include "id.h"
31 #include "module.h"
32 #include "scope.h"
33 #include "hdrgen.h"
34 #include "doc.h"
35 #include "mtype.h"
36 #include "utf.h"
37 
38 void emitMemberComments(ScopeDsymbol *sds, OutBuffer *buf, Scope *sc);
39 void toDocBuffer(Dsymbol *s, OutBuffer *buf, Scope *sc);
40 void emitComment(Dsymbol *s, OutBuffer *buf, Scope *sc);
41 
42 struct Escape
43 {
44     const char *strings[256];
45 
46     const char *escapeChar(unsigned c);
47 };
48 
49 class Section
50 {
51 public:
52     const utf8_t *name;
53     size_t namelen;
54 
55     const utf8_t *body;
56     size_t bodylen;
57 
58     int nooutput;
59 
60     virtual void write(Loc loc, DocComment *dc, Scope *sc, Dsymbols *a, OutBuffer *buf);
61 };
62 
63 class ParamSection : public Section
64 {
65 public:
66     void write(Loc loc, DocComment *dc, Scope *sc, Dsymbols *a, OutBuffer *buf);
67 };
68 
69 class MacroSection : public Section
70 {
71 public:
72     void write(Loc loc, DocComment *dc, Scope *sc, Dsymbols *a, OutBuffer *buf);
73 };
74 
75 typedef Array<Section *> Sections;
76 
77 struct DocComment
78 {
79     Sections sections;             // Section*[]
80 
81     Section *summary;
82     Section *copyright;
83     Section *macros;
84     Macro **pmacrotable;
85     Escape **pescapetable;
86 
87     Dsymbols a;
88 
DocCommentDocComment89     DocComment() :
90        summary(NULL), copyright(NULL), macros(NULL), pmacrotable(NULL), pescapetable(NULL)
91     { }
92 
93     static DocComment *parse(Dsymbol *s, const utf8_t *comment);
94     static void parseMacros(Escape **pescapetable, Macro **pmacrotable, const utf8_t *m, size_t mlen);
95     static void parseEscapes(Escape **pescapetable, const utf8_t *textstart, size_t textlen);
96 
97     void parseSections(const utf8_t *comment);
98     void writeSections(Scope *sc, Dsymbols *a, OutBuffer *buf);
99 };
100 
101 
102 int cmp(const char *stringz, const void *s, size_t slen);
103 int icmp(const char *stringz, const void *s, size_t slen);
104 bool isDitto(const utf8_t *comment);
105 const utf8_t *skipwhitespace(const utf8_t *p);
106 size_t skiptoident(OutBuffer *buf, size_t i);
107 size_t skippastident(OutBuffer *buf, size_t i);
108 size_t skippastURL(OutBuffer *buf, size_t i);
109 void highlightText(Scope *sc, Dsymbols *a, OutBuffer *buf, size_t offset);
110 void highlightCode(Scope *sc, Dsymbol *s, OutBuffer *buf, size_t offset);
111 void highlightCode(Scope *sc, Dsymbols *a, OutBuffer *buf, size_t offset);
112 void highlightCode2(Scope *sc, Dsymbols *a, OutBuffer *buf, size_t offset);
113 void highlightCode3(Scope *sc, OutBuffer *buf, const utf8_t *p, const utf8_t *pend);
114 TypeFunction *isTypeFunction(Dsymbol *s);
115 Parameter *isFunctionParameter(Dsymbols *a, const utf8_t *p, size_t len);
116 TemplateParameter *isTemplateParameter(Dsymbols *a, const utf8_t *p, size_t len);
117 
118 bool isIdStart(const utf8_t *p);
119 bool isCVariadicArg(const utf8_t *p, size_t len);
120 bool isIdTail(const utf8_t *p);
121 bool isIndentWS(const utf8_t *p);
122 int utfStride(const utf8_t *p);
123 
124 // Workaround for missing Parameter instance for variadic params. (it's unnecessary to instantiate one).
isCVariadicParameter(Dsymbols * a,const utf8_t * p,size_t len)125 bool isCVariadicParameter(Dsymbols *a, const utf8_t *p, size_t len)
126 {
127     for (size_t i = 0; i < a->dim; i++)
128     {
129         TypeFunction *tf = isTypeFunction((*a)[i]);
130         if (tf && tf->varargs == 1 && cmp("...", p, len) == 0)
131             return true;
132     }
133     return false;
134 }
135 
136 /****************************************************
137  */
isFunctionParameter(Dsymbol * s,const utf8_t * p,size_t len)138 static Parameter *isFunctionParameter(Dsymbol *s, const utf8_t *p, size_t len)
139 {
140     TypeFunction *tf = isTypeFunction(s);
141     if (tf && tf->parameters)
142     {
143         for (size_t k = 0; k < tf->parameters->dim; k++)
144         {
145             Parameter *fparam = (*tf->parameters)[k];
146             if (fparam->ident && cmp(fparam->ident->toChars(), p, len) == 0)
147             {
148                 return fparam;
149             }
150         }
151     }
152     return NULL;
153 }
154 
getEponymousMember(TemplateDeclaration * td)155 static Dsymbol *getEponymousMember(TemplateDeclaration *td)
156 {
157     if (!td->onemember)
158         return NULL;
159 
160     if (AggregateDeclaration *ad = td->onemember->isAggregateDeclaration())
161         return ad;
162     if (FuncDeclaration *fd = td->onemember->isFuncDeclaration())
163         return fd;
164     if (td->onemember->isEnumMember())
165         return NULL;    // Keep backward compatibility. See compilable/ddoc9.d
166     if (VarDeclaration *vd = td->onemember->isVarDeclaration())
167         return td->constraint ? NULL : vd;
168 
169     return NULL;
170 }
171 
172 /****************************************************
173  */
isEponymousFunctionParameter(Dsymbols * a,const utf8_t * p,size_t len)174 static Parameter *isEponymousFunctionParameter(Dsymbols *a, const utf8_t *p, size_t len)
175 {
176     for (size_t i = 0; i < a->dim; i++)
177     {
178         TemplateDeclaration *td = (*a)[i]->isTemplateDeclaration();
179         if (td && td->onemember)
180         {
181             /* Case 1: we refer to a template declaration inside the template
182 
183                /// ...ddoc...
184                template case1(T) {
185                  void case1(R)() {}
186                }
187              */
188             td = td->onemember->isTemplateDeclaration();
189         }
190         if (!td)
191         {
192             /* Case 2: we're an alias to a template declaration
193 
194                /// ...ddoc...
195                alias case2 = case1!int;
196              */
197             AliasDeclaration *ad = (*a)[i]->isAliasDeclaration();
198             if (ad && ad->aliassym)
199             {
200                 td = ad->aliassym->isTemplateDeclaration();
201             }
202         }
203         while (td)
204         {
205             Dsymbol *sym = getEponymousMember(td);
206             if (sym)
207             {
208                 Parameter *fparam = isFunctionParameter(sym, p, len);
209                 if (fparam)
210                 {
211                     return fparam;
212                 }
213             }
214             td = td->overnext;
215         }
216     }
217     return NULL;
218 }
219 
getEponymousParent(Dsymbol * s)220 static TemplateDeclaration *getEponymousParent(Dsymbol *s)
221 {
222     if (!s->parent)
223         return NULL;
224     TemplateDeclaration *td = s->parent->isTemplateDeclaration();
225     return (td && getEponymousMember(td)) ? td : NULL;
226 }
227 
228 static const char ddoc_default[] = "\
229 DDOC =  <html><head>\n\
230         <META http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\n\
231         <title>$(TITLE)</title>\n\
232         </head><body>\n\
233         <h1>$(TITLE)</h1>\n\
234         $(BODY)\n\
235         <hr>$(SMALL Page generated by $(LINK2 http://dlang.org/ddoc.html, Ddoc). $(COPYRIGHT))\n\
236         </body></html>\n\
237 \n\
238 B =     <b>$0</b>\n\
239 I =     <i>$0</i>\n\
240 U =     <u>$0</u>\n\
241 P =     <p>$0</p>\n\
242 DL =    <dl>$0</dl>\n\
243 DT =    <dt>$0</dt>\n\
244 DD =    <dd>$0</dd>\n\
245 TABLE = <table>$0</table>\n\
246 TR =    <tr>$0</tr>\n\
247 TH =    <th>$0</th>\n\
248 TD =    <td>$0</td>\n\
249 OL =    <ol>$0</ol>\n\
250 UL =    <ul>$0</ul>\n\
251 LI =    <li>$0</li>\n\
252 BIG =   <big>$0</big>\n\
253 SMALL = <small>$0</small>\n\
254 BR =    <br>\n\
255 LINK =  <a href=\"$0\">$0</a>\n\
256 LINK2 = <a href=\"$1\">$+</a>\n\
257 LPAREN= (\n\
258 RPAREN= )\n\
259 BACKTICK= `\n\
260 DOLLAR= $\n\
261 DEPRECATED= $0\n\
262 \n\
263 RED =   <font color=red>$0</font>\n\
264 BLUE =  <font color=blue>$0</font>\n\
265 GREEN = <font color=green>$0</font>\n\
266 YELLOW =<font color=yellow>$0</font>\n\
267 BLACK = <font color=black>$0</font>\n\
268 WHITE = <font color=white>$0</font>\n\
269 \n\
270 D_CODE = <pre class=\"d_code\">$0</pre>\n\
271 DDOC_BACKQUOTED = $(D_INLINECODE $0)\n\
272 D_INLINECODE = <pre style=\"display:inline;\" class=\"d_inline_code\">$0</pre>\n\
273 D_COMMENT = $(GREEN $0)\n\
274 D_STRING  = $(RED $0)\n\
275 D_KEYWORD = $(BLUE $0)\n\
276 D_PSYMBOL = $(U $0)\n\
277 D_PARAM   = $(I $0)\n\
278 \n\
279 DDOC_COMMENT   = <!-- $0 -->\n\
280 DDOC_DECL      = $(DT $(BIG $0))\n\
281 DDOC_DECL_DD   = $(DD $0)\n\
282 DDOC_DITTO     = $(BR)$0\n\
283 DDOC_SECTIONS  = $0\n\
284 DDOC_SUMMARY   = $0$(BR)$(BR)\n\
285 DDOC_DESCRIPTION = $0$(BR)$(BR)\n\
286 DDOC_AUTHORS   = $(B Authors:)$(BR)\n$0$(BR)$(BR)\n\
287 DDOC_BUGS      = $(RED BUGS:)$(BR)\n$0$(BR)$(BR)\n\
288 DDOC_COPYRIGHT = $(B Copyright:)$(BR)\n$0$(BR)$(BR)\n\
289 DDOC_DATE      = $(B Date:)$(BR)\n$0$(BR)$(BR)\n\
290 DDOC_DEPRECATED = $(RED Deprecated:)$(BR)\n$0$(BR)$(BR)\n\
291 DDOC_EXAMPLES  = $(B Examples:)$(BR)\n$0$(BR)$(BR)\n\
292 DDOC_HISTORY   = $(B History:)$(BR)\n$0$(BR)$(BR)\n\
293 DDOC_LICENSE   = $(B License:)$(BR)\n$0$(BR)$(BR)\n\
294 DDOC_RETURNS   = $(B Returns:)$(BR)\n$0$(BR)$(BR)\n\
295 DDOC_SEE_ALSO  = $(B See Also:)$(BR)\n$0$(BR)$(BR)\n\
296 DDOC_STANDARDS = $(B Standards:)$(BR)\n$0$(BR)$(BR)\n\
297 DDOC_THROWS    = $(B Throws:)$(BR)\n$0$(BR)$(BR)\n\
298 DDOC_VERSION   = $(B Version:)$(BR)\n$0$(BR)$(BR)\n\
299 DDOC_SECTION_H = $(B $0)$(BR)\n\
300 DDOC_SECTION   = $0$(BR)$(BR)\n\
301 DDOC_MEMBERS   = $(DL $0)\n\
302 DDOC_MODULE_MEMBERS = $(DDOC_MEMBERS $0)\n\
303 DDOC_CLASS_MEMBERS  = $(DDOC_MEMBERS $0)\n\
304 DDOC_STRUCT_MEMBERS = $(DDOC_MEMBERS $0)\n\
305 DDOC_ENUM_MEMBERS   = $(DDOC_MEMBERS $0)\n\
306 DDOC_TEMPLATE_MEMBERS = $(DDOC_MEMBERS $0)\n\
307 DDOC_ENUM_BASETYPE = $0\n\
308 DDOC_PARAMS    = $(B Params:)$(BR)\n$(TABLE $0)$(BR)\n\
309 DDOC_PARAM_ROW = $(TR $0)\n\
310 DDOC_PARAM_ID  = $(TD $0)\n\
311 DDOC_PARAM_DESC = $(TD $0)\n\
312 DDOC_BLANKLINE  = $(BR)$(BR)\n\
313 \n\
314 DDOC_ANCHOR     = <a name=\"$1\"></a>\n\
315 DDOC_PSYMBOL    = $(U $0)\n\
316 DDOC_PSUPER_SYMBOL = $(U $0)\n\
317 DDOC_KEYWORD    = $(B $0)\n\
318 DDOC_PARAM      = $(I $0)\n\
319 \n\
320 ESCAPES = /</&lt;/\n\
321           />/&gt;/\n\
322           /&/&amp;/\n\
323 ";
324 
325 static const char ddoc_decl_s[] = "$(DDOC_DECL ";
326 static const char ddoc_decl_e[] = ")\n";
327 
328 static const char ddoc_decl_dd_s[] = "$(DDOC_DECL_DD ";
329 static const char ddoc_decl_dd_e[] = ")\n";
330 
331 
332 /****************************************************
333  */
334 
gendocfile(Module * m)335 void gendocfile(Module *m)
336 {
337     static OutBuffer mbuf;
338     static int mbuf_done;
339 
340     OutBuffer buf;
341 
342     //printf("Module::gendocfile()\n");
343 
344     if (!mbuf_done)             // if not already read the ddoc files
345     {
346         mbuf_done = 1;
347 
348         // Use our internal default
349         mbuf.write(ddoc_default, strlen(ddoc_default));
350 
351         // Override with DDOCFILE specified in the sc.ini file
352         char *p = getenv("DDOCFILE");
353         if (p)
354             global.params.ddocfiles->shift(p);
355 
356         // Override with the ddoc macro files from the command line
357         for (size_t i = 0; i < global.params.ddocfiles->dim; i++)
358         {
359             FileName f((*global.params.ddocfiles)[i]);
360             File file(&f);
361             readFile(m->loc, &file);
362             // BUG: convert file contents to UTF-8 before use
363 
364             //printf("file: '%.*s'\n", file.len, file.buffer);
365             mbuf.write(file.buffer, file.len);
366         }
367     }
368     DocComment::parseMacros(&m->escapetable, &m->macrotable, (utf8_t *)mbuf.data, mbuf.offset);
369 
370     Scope *sc = Scope::createGlobal(m);      // create root scope
371 
372     DocComment *dc = DocComment::parse(m, m->comment);
373     dc->pmacrotable = &m->macrotable;
374     dc->pescapetable = &m->escapetable;
375     sc->lastdc = dc;
376 
377     // Generate predefined macros
378 
379     // Set the title to be the name of the module
380     {
381         const char *p = m->toPrettyChars();
382         Macro::define(&m->macrotable, (const utf8_t *)"TITLE", 5, (const utf8_t *)p, strlen(p));
383     }
384 
385     // Set time macros
386     {
387         time_t t;
388         time(&t);
389         char *p = ctime(&t);
390         p = mem.xstrdup(p);
391         Macro::define(&m->macrotable, (const utf8_t *)"DATETIME", 8, (const utf8_t *)p, strlen(p));
392         Macro::define(&m->macrotable, (const utf8_t *)"YEAR", 4, (const utf8_t *)p + 20, 4);
393     }
394 
395     const char *srcfilename = m->srcfile->toChars();
396     Macro::define(&m->macrotable, (const utf8_t *)"SRCFILENAME", 11, (const utf8_t *)srcfilename, strlen(srcfilename));
397 
398     const char *docfilename = m->docfile->toChars();
399     Macro::define(&m->macrotable, (const utf8_t *)"DOCFILENAME", 11, (const utf8_t *)docfilename, strlen(docfilename));
400 
401     if (dc->copyright)
402     {
403         dc->copyright->nooutput = 1;
404         Macro::define(&m->macrotable, (const utf8_t *)"COPYRIGHT", 9, dc->copyright->body, dc->copyright->bodylen);
405     }
406 
407     buf.printf("$(DDOC_COMMENT Generated by Ddoc from %s)\n", m->srcfile->toChars());
408     if (m->isDocFile)
409     {
410         Loc loc = m->md ? m->md->loc : m->loc;
411         size_t commentlen = strlen((const char *)m->comment);
412         Dsymbols a;
413         // Bugzilla 9764: Don't push m in a, to prevent emphasize ddoc file name.
414         if (dc->macros)
415         {
416             commentlen = dc->macros->name - m->comment;
417             dc->macros->write(loc, dc, sc, &a, &buf);
418         }
419         buf.write(m->comment, commentlen);
420         highlightText(sc, &a, &buf, 0);
421     }
422     else
423     {
424         Dsymbols a;
425         a.push(m);
426         dc->writeSections(sc, &a, &buf);
427         emitMemberComments(m, &buf, sc);
428     }
429 
430     //printf("BODY= '%.*s'\n", buf.offset, buf.data);
431     Macro::define(&m->macrotable, (const utf8_t *)"BODY", 4, (const utf8_t *)buf.data, buf.offset);
432 
433     OutBuffer buf2;
434     buf2.writestring("$(DDOC)\n");
435     size_t end = buf2.offset;
436     m->macrotable->expand(&buf2, 0, &end, NULL, 0);
437 
438     /* Remove all the escape sequences from buf2,
439      * and make CR-LF the newline.
440      */
441     {
442         buf.setsize(0);
443         buf.reserve(buf2.offset);
444         utf8_t *p = (utf8_t *)buf2.data;
445         for (size_t j = 0; j < buf2.offset; j++)
446         {
447             utf8_t c = p[j];
448             if (c == 0xFF && j + 1 < buf2.offset)
449             {
450                 j++;
451                 continue;
452             }
453             if (c == '\n')
454                 buf.writeByte('\r');
455             else if (c == '\r')
456             {
457                 buf.writestring("\r\n");
458                 if (j + 1 < buf2.offset && p[j + 1] == '\n')
459                 {
460                     j++;
461                 }
462                 continue;
463             }
464             buf.writeByte(c);
465         }
466     }
467 
468     // Transfer image to file
469     assert(m->docfile);
470     m->docfile->setbuffer(buf.data, buf.offset);
471     m->docfile->ref = 1;
472     ensurePathToNameExists(Loc(), m->docfile->toChars());
473     writeFile(m->loc, m->docfile);
474 }
475 
476 /****************************************************
477  * Having unmatched parentheses can hose the output of Ddoc,
478  * as the macros depend on properly nested parentheses.
479  * This function replaces all ( with $(LPAREN) and ) with $(RPAREN)
480  * to preserve text literally. This also means macros in the
481  * text won't be expanded.
482  */
escapeDdocString(OutBuffer * buf,size_t start)483 void escapeDdocString(OutBuffer *buf, size_t start)
484 {
485     for (size_t u = start; u < buf->offset; u++)
486     {
487         utf8_t c = buf->data[u];
488         switch(c)
489         {
490             case '$':
491                 buf->remove(u, 1);
492                 buf->insert(u, (const char *)"$(DOLLAR)", 9);
493                 u += 8;
494                 break;
495 
496             case '(':
497                 buf->remove(u, 1); //remove the (
498                 buf->insert(u, (const char *)"$(LPAREN)", 9); //insert this instead
499                 u += 8; //skip over newly inserted macro
500                 break;
501 
502             case ')':
503                 buf->remove(u, 1); //remove the )
504                 buf->insert(u, (const char *)"$(RPAREN)", 9); //insert this instead
505                 u += 8; //skip over newly inserted macro
506                 break;
507         }
508     }
509 }
510 
511 /****************************************************
512  * Having unmatched parentheses can hose the output of Ddoc,
513  * as the macros depend on properly nested parentheses.
514 
515  * Fix by replacing unmatched ( with $(LPAREN) and unmatched ) with $(RPAREN).
516  */
escapeStrayParenthesis(Loc loc,OutBuffer * buf,size_t start)517 void escapeStrayParenthesis(Loc loc, OutBuffer *buf, size_t start)
518 {
519     unsigned par_open = 0;
520 
521     for (size_t u = start; u < buf->offset; u++)
522     {
523         utf8_t c = buf->data[u];
524         switch(c)
525         {
526             case '(':
527                 par_open++;
528                 break;
529 
530             case ')':
531                 if (par_open == 0)
532                 {
533                     //stray ')'
534                     warning(loc, "Ddoc: Stray ')'. This may cause incorrect Ddoc output."
535                         " Use $(RPAREN) instead for unpaired right parentheses.");
536                     buf->remove(u, 1); //remove the )
537                     buf->insert(u, (const char *)"$(RPAREN)", 9); //insert this instead
538                     u += 8; //skip over newly inserted macro
539                 }
540                 else
541                     par_open--;
542                 break;
543         }
544     }
545 
546     if (par_open)                       // if any unmatched lparens
547     {
548         par_open = 0;
549         for (size_t u = buf->offset; u > start;)
550         {
551             u--;
552             utf8_t c = buf->data[u];
553             switch(c)
554             {
555                 case ')':
556                     par_open++;
557                     break;
558 
559                 case '(':
560                     if (par_open == 0)
561                     {
562                         //stray '('
563                         warning(loc, "Ddoc: Stray '('. This may cause incorrect Ddoc output."
564                             " Use $(LPAREN) instead for unpaired left parentheses.");
565                         buf->remove(u, 1); //remove the (
566                         buf->insert(u, (const char *)"$(LPAREN)", 9); //insert this instead
567                     }
568                     else
569                         par_open--;
570                     break;
571             }
572         }
573     }
574 }
575 
576 // Basically, this is to skip over things like private{} blocks in a struct or
577 // class definition that don't add any components to the qualified name.
skipNonQualScopes(Scope * sc)578 static Scope *skipNonQualScopes(Scope *sc)
579 {
580     while (sc && !sc->scopesym)
581         sc = sc->enclosing;
582     return sc;
583 }
584 
emitAnchorName(OutBuffer * buf,Dsymbol * s,Scope * sc)585 static bool emitAnchorName(OutBuffer *buf, Dsymbol *s, Scope *sc)
586 {
587     if (!s || s->isPackage() || s->isModule())
588         return false;
589 
590     // Add parent names first
591     bool dot = false;
592     if (s->parent)
593         dot = emitAnchorName(buf, s->parent, sc);
594     else if (sc)
595         dot = emitAnchorName(buf, sc->scopesym, skipNonQualScopes(sc->enclosing));
596 
597     // Eponymous template members can share the parent anchor name
598     if (getEponymousParent(s))
599         return dot;
600     if (dot)
601         buf->writeByte('.');
602 
603     // Use "this" not "__ctor"
604     TemplateDeclaration *td;
605     if (s->isCtorDeclaration() || ((td = s->isTemplateDeclaration()) != NULL &&
606         td->onemember && td->onemember->isCtorDeclaration()))
607     {
608         buf->writestring("this");
609     }
610     else
611     {
612         /* We just want the identifier, not overloads like TemplateDeclaration::toChars.
613          * We don't want the template parameter list and constraints. */
614         buf->writestring(s->Dsymbol::toChars());
615     }
616     return true;
617 }
618 
emitAnchor(OutBuffer * buf,Dsymbol * s,Scope * sc)619 static void emitAnchor(OutBuffer *buf, Dsymbol *s, Scope *sc)
620 {
621     Identifier *ident;
622     {
623         OutBuffer anc;
624         emitAnchorName(&anc, s, skipNonQualScopes(sc));
625         ident = Identifier::idPool(anc.peekString());
626     }
627     size_t *count = (size_t*)dmd_aaGet(&sc->anchorCounts, (void *)ident);
628     TemplateDeclaration *td = getEponymousParent(s);
629     // don't write an anchor for matching consecutive ditto symbols
630     if (*count > 0 && sc->prevAnchor == ident &&
631         sc->lastdc && (isDitto(s->comment) || (td && isDitto(td->comment))))
632         return;
633 
634     (*count)++;
635     // cache anchor name
636     sc->prevAnchor = ident;
637 
638     buf->writestring("$(DDOC_ANCHOR ");
639     buf->writestring(ident->toChars());
640     // only append count once there's a duplicate
641     if (*count != 1)
642         buf->printf(".%u", *count);
643     buf->writeByte(')');
644 }
645 
646 /******************************* emitComment **********************************/
647 
648 /** Get leading indentation from 'src' which represents lines of code. */
getCodeIndent(const char * src)649 static size_t getCodeIndent(const char *src)
650 {
651     while (src && (*src == '\r' || *src == '\n'))
652         ++src;  // skip until we find the first non-empty line
653 
654     size_t codeIndent = 0;
655     while (src && (*src == ' ' || *src == '\t'))
656     {
657         codeIndent++;
658         src++;
659     }
660     return codeIndent;
661 }
662 
663 /** Recursively expand template mixin member docs into the scope. */
expandTemplateMixinComments(TemplateMixin * tm,OutBuffer * buf,Scope * sc)664 static void expandTemplateMixinComments(TemplateMixin *tm, OutBuffer *buf, Scope *sc)
665 {
666     if (!tm->semanticRun) tm->semantic(sc);
667     TemplateDeclaration *td = (tm && tm->tempdecl) ?
668         tm->tempdecl->isTemplateDeclaration() : NULL;
669     if (td && td->members)
670     {
671         for (size_t i = 0; i < td->members->dim; i++)
672         {
673             Dsymbol *sm = (*td->members)[i];
674             TemplateMixin *tmc = sm->isTemplateMixin();
675             if (tmc && tmc->comment)
676                 expandTemplateMixinComments(tmc, buf, sc);
677             else
678                 emitComment(sm, buf, sc);
679         }
680     }
681 }
682 
emitMemberComments(ScopeDsymbol * sds,OutBuffer * buf,Scope * sc)683 void emitMemberComments(ScopeDsymbol *sds, OutBuffer *buf, Scope *sc)
684 {
685     if (!sds->members)
686         return;
687 
688     //printf("ScopeDsymbol::emitMemberComments() %s\n", toChars());
689 
690     const char *m = "$(DDOC_MEMBERS ";
691     if (sds->isTemplateDeclaration())
692         m = "$(DDOC_TEMPLATE_MEMBERS ";
693     else if (sds->isClassDeclaration())
694         m = "$(DDOC_CLASS_MEMBERS ";
695     else if (sds->isStructDeclaration())
696         m = "$(DDOC_STRUCT_MEMBERS ";
697     else if (sds->isEnumDeclaration())
698         m = "$(DDOC_ENUM_MEMBERS ";
699     else if (sds->isModule())
700         m = "$(DDOC_MODULE_MEMBERS ";
701 
702     size_t offset1 = buf->offset;         // save starting offset
703     buf->writestring(m);
704     size_t offset2 = buf->offset;         // to see if we write anything
705 
706     sc = sc->push(sds);
707 
708     for (size_t i = 0; i < sds->members->dim; i++)
709     {
710         Dsymbol *s = (*sds->members)[i];
711         //printf("\ts = '%s'\n", s->toChars());
712 
713         // only expand if parent is a non-template (semantic won't work)
714         if (s->comment && s->isTemplateMixin() && s->parent && !s->parent->isTemplateDeclaration())
715             expandTemplateMixinComments((TemplateMixin *)s, buf, sc);
716 
717         emitComment(s, buf, sc);
718     }
719     emitComment(NULL, buf, sc);
720 
721     sc->pop();
722 
723     if (buf->offset == offset2)
724     {
725         /* Didn't write out any members, so back out last write
726          */
727         buf->offset = offset1;
728     }
729     else
730         buf->writestring(")\n");
731 }
732 
emitProtection(OutBuffer * buf,Prot prot)733 void emitProtection(OutBuffer *buf, Prot prot)
734 {
735     if (prot.kind != PROTundefined && prot.kind != PROTpublic)
736     {
737         protectionToBuffer(buf, prot);
738         buf->writeByte(' ');
739     }
740 }
741 
emitComment(Dsymbol * s,OutBuffer * buf,Scope * sc)742 void emitComment(Dsymbol *s, OutBuffer *buf, Scope *sc)
743 {
744     class EmitComment : public Visitor
745     {
746     public:
747         OutBuffer *buf;
748         Scope *sc;
749 
750         EmitComment(OutBuffer *buf, Scope *sc)
751             : buf(buf), sc(sc)
752         {
753         }
754 
755         void visit(Dsymbol *) {}
756         void visit(InvariantDeclaration *) {}
757         void visit(UnitTestDeclaration *) {}
758         void visit(PostBlitDeclaration *) {}
759         void visit(DtorDeclaration *) {}
760         void visit(StaticCtorDeclaration *) {}
761         void visit(StaticDtorDeclaration *) {}
762         void visit(TypeInfoDeclaration *) {}
763 
764         void emit(Scope *sc, Dsymbol *s, const utf8_t *com)
765         {
766             if (s && sc->lastdc && isDitto(com))
767             {
768                 sc->lastdc->a.push(s);
769                 return;
770             }
771 
772             // Put previous doc comment if exists
773             if (DocComment *dc = sc->lastdc)
774             {
775                 // Put the declaration signatures as the document 'title'
776                 buf->writestring(ddoc_decl_s);
777                 for (size_t i = 0; i < dc->a.dim; i++)
778                 {
779                     Dsymbol *sx = dc->a[i];
780 
781                     if (i == 0)
782                     {
783                         size_t o = buf->offset;
784                         toDocBuffer(sx, buf, sc);
785                         highlightCode(sc, sx, buf, o);
786                         continue;
787                     }
788 
789                     buf->writestring("$(DDOC_DITTO ");
790                     {
791                         size_t o = buf->offset;
792                         toDocBuffer(sx, buf, sc);
793                         highlightCode(sc, sx, buf, o);
794                     }
795                     buf->writeByte(')');
796                 }
797                 buf->writestring(ddoc_decl_e);
798 
799                 // Put the ddoc comment as the document 'description'
800                 buf->writestring(ddoc_decl_dd_s);
801                 {
802                     dc->writeSections(sc, &dc->a, buf);
803                     if (ScopeDsymbol *sds = dc->a[0]->isScopeDsymbol())
804                         emitMemberComments(sds, buf, sc);
805                 }
806                 buf->writestring(ddoc_decl_dd_e);
807                 //printf("buf.2 = [[%.*s]]\n", buf->offset - o0, buf->data + o0);
808             }
809 
810             if (s)
811             {
812                 DocComment *dc = DocComment::parse(s, com);
813                 dc->pmacrotable = &sc->_module->macrotable;
814                 sc->lastdc = dc;
815             }
816         }
817 
818         void visit(Declaration *d)
819         {
820             //printf("Declaration::emitComment(%p '%s'), comment = '%s'\n", d, d->toChars(), d->comment);
821             //printf("type = %p\n", d->type);
822             const utf8_t *com = d->comment;
823             if (TemplateDeclaration *td = getEponymousParent(d))
824             {
825                 if (isDitto(td->comment))
826                     com = td->comment;
827                 else
828                     com = Lexer::combineComments(td->comment, com);
829             }
830             else
831             {
832                 if (!d->ident)
833                     return;
834                 if (!d->type && !d->isCtorDeclaration() && !d->isAliasDeclaration())
835                     return;
836                 if (d->protection.kind == PROTprivate || sc->protection.kind == PROTprivate)
837                     return;
838             }
839             if (!com)
840                 return;
841 
842             emit(sc, d, com);
843         }
844 
845         void visit(AggregateDeclaration *ad)
846         {
847             //printf("AggregateDeclaration::emitComment() '%s'\n", ad->toChars());
848             const utf8_t *com = ad->comment;
849             if (TemplateDeclaration *td = getEponymousParent(ad))
850             {
851                 if (isDitto(td->comment))
852                     com = td->comment;
853                 else
854                     com = Lexer::combineComments(td->comment, com);
855             }
856             else
857             {
858                 if (ad->prot().kind == PROTprivate || sc->protection.kind == PROTprivate)
859                     return;
860                 if (!ad->comment)
861                     return;
862             }
863             if (!com)
864                 return;
865 
866             emit(sc, ad, com);
867         }
868 
869         void visit(TemplateDeclaration *td)
870         {
871             //printf("TemplateDeclaration::emitComment() '%s', kind = %s\n", td->toChars(), td->kind());
872             if (td->prot().kind == PROTprivate || sc->protection.kind == PROTprivate)
873                 return;
874             if (!td->comment)
875                 return;
876 
877             if (Dsymbol *ss = getEponymousMember(td))
878             {
879                 ss->accept(this);
880                 return;
881             }
882             emit(sc, td, td->comment);
883         }
884 
885         void visit(EnumDeclaration *ed)
886         {
887             if (ed->prot().kind == PROTprivate || sc->protection.kind == PROTprivate)
888                 return;
889             if (ed->isAnonymous() && ed->members)
890             {
891                 for (size_t i = 0; i < ed->members->dim; i++)
892                 {
893                     Dsymbol *s = (*ed->members)[i];
894                     emitComment(s, buf, sc);
895                 }
896                 return;
897             }
898             if (!ed->comment)
899                 return;
900             if (ed->isAnonymous())
901                 return;
902 
903             emit(sc, ed, ed->comment);
904         }
905 
906         void visit(EnumMember *em)
907         {
908             //printf("EnumMember::emitComment(%p '%s'), comment = '%s'\n", em, em->toChars(), em->comment);
909             if (em->prot().kind == PROTprivate || sc->protection.kind == PROTprivate)
910                 return;
911             if (!em->comment)
912                 return;
913 
914             emit(sc, em, em->comment);
915         }
916 
917         void visit(AttribDeclaration *ad)
918         {
919             //printf("AttribDeclaration::emitComment(sc = %p)\n", sc);
920 
921             /* A general problem with this, illustrated by BUGZILLA 2516,
922              * is that attributes are not transmitted through to the underlying
923              * member declarations for template bodies, because semantic analysis
924              * is not done for template declaration bodies
925              * (only template instantiations).
926              * Hence, Ddoc omits attributes from template members.
927              */
928 
929             Dsymbols *d = ad->include(NULL, NULL);
930 
931             if (d)
932             {
933                 for (size_t i = 0; i < d->dim; i++)
934                 {
935                     Dsymbol *s = (*d)[i];
936                     //printf("AttribDeclaration::emitComment %s\n", s->toChars());
937                     emitComment(s, buf, sc);
938                 }
939             }
940         }
941 
942         void visit(ProtDeclaration *pd)
943         {
944             if (pd->decl)
945             {
946                 Scope *scx = sc;
947                 sc = sc->copy();
948                 sc->protection = pd->protection;
949                 visit((AttribDeclaration *)pd);
950                 scx->lastdc = sc->lastdc;
951                 sc = sc->pop();
952             }
953         }
954 
955         void visit(ConditionalDeclaration *cd)
956         {
957             //printf("ConditionalDeclaration::emitComment(sc = %p)\n", sc);
958             if (cd->condition->inc)
959             {
960                 visit((AttribDeclaration *)cd);
961                 return;
962             }
963 
964             /* If generating doc comment, be careful because if we're inside
965              * a template, then include(NULL, NULL) will fail.
966              */
967             Dsymbols *d = cd->decl ? cd->decl : cd->elsedecl;
968             for (size_t i = 0; i < d->dim; i++)
969             {
970                 Dsymbol *s = (*d)[i];
971                 emitComment(s, buf, sc);
972             }
973         }
974     };
975 
976     EmitComment v(buf, sc);
977 
978     if (!s)
979         v.emit(sc, NULL, NULL);
980     else
981         s->accept(&v);
982 }
983 
984 /******************************* toDocBuffer **********************************/
985 
toDocBuffer(Dsymbol * s,OutBuffer * buf,Scope * sc)986 void toDocBuffer(Dsymbol *s, OutBuffer *buf, Scope *sc)
987 {
988     class ToDocBuffer : public Visitor
989     {
990     public:
991         OutBuffer *buf;
992         Scope *sc;
993 
994         ToDocBuffer(OutBuffer *buf, Scope *sc)
995             : buf(buf), sc(sc)
996         {
997         }
998 
999         void visit(Dsymbol *s)
1000         {
1001             //printf("Dsymbol::toDocbuffer() %s\n", s->toChars());
1002             HdrGenState hgs;
1003             hgs.ddoc = true;
1004             ::toCBuffer(s, buf, &hgs);
1005         }
1006 
1007         void prefix(Dsymbol *s)
1008         {
1009             if (s->isDeprecated())
1010                 buf->writestring("deprecated ");
1011 
1012             if (Declaration *d = s->isDeclaration())
1013             {
1014                 emitProtection(buf, d->protection);
1015 
1016                 if (d->isStatic())
1017                     buf->writestring("static ");
1018                 else if (d->isFinal())
1019                     buf->writestring("final ");
1020                 else if (d->isAbstract())
1021                     buf->writestring("abstract ");
1022 
1023                 if (!d->isFuncDeclaration())  // functionToBufferFull handles this
1024                 {
1025                     if (d->isConst())
1026                         buf->writestring("const ");
1027                     if (d->isImmutable())
1028                         buf->writestring("immutable ");
1029                     if (d->isSynchronized())
1030                         buf->writestring("synchronized ");
1031 
1032                     if (d->storage_class & STCmanifest)
1033                         buf->writestring("enum ");
1034                 }
1035             }
1036         }
1037 
1038         void visit(Declaration *d)
1039         {
1040             if (!d->ident)
1041                 return;
1042 
1043             TemplateDeclaration *td = getEponymousParent(d);
1044             //printf("Declaration::toDocbuffer() %s, originalType = %s, td = %s\n", d->toChars(), d->originalType ? d->originalType->toChars() : "--", td ? td->toChars() : "--");
1045 
1046             HdrGenState hgs;
1047             hgs.ddoc = true;
1048 
1049             if (d->isDeprecated())
1050                 buf->writestring("$(DEPRECATED ");
1051 
1052             prefix(d);
1053 
1054             if (d->type)
1055             {
1056                 Type *origType = d->originalType ? d->originalType : d->type;
1057                 if (origType->ty == Tfunction)
1058                 {
1059                     functionToBufferFull((TypeFunction *)origType, buf, d->ident, &hgs, td);
1060                 }
1061                 else
1062                     ::toCBuffer(origType, buf, d->ident, &hgs);
1063             }
1064             else
1065                 buf->writestring(d->ident->toChars());
1066 
1067             if (d->isVarDeclaration() && td)
1068             {
1069                 buf->writeByte('(');
1070                 if (td->origParameters && td->origParameters->dim)
1071                 {
1072                     for (size_t i = 0; i < td->origParameters->dim; i++)
1073                     {
1074                         if (i)
1075                             buf->writestring(", ");
1076                         toCBuffer((*td->origParameters)[i], buf, &hgs);
1077                     }
1078                 }
1079                 buf->writeByte(')');
1080             }
1081 
1082             // emit constraints if declaration is a templated declaration
1083             if (td && td->constraint)
1084             {
1085                 buf->writestring(" if (");
1086                 ::toCBuffer(td->constraint, buf, &hgs);
1087                 buf->writeByte(')');
1088             }
1089 
1090             if (d->isDeprecated())
1091                 buf->writestring(")");
1092 
1093             buf->writestring(";\n");
1094         }
1095 
1096         void visit(AliasDeclaration *ad)
1097         {
1098             //printf("AliasDeclaration::toDocbuffer() %s\n", ad->toChars());
1099             if (!ad->ident)
1100                 return;
1101 
1102             if (ad->isDeprecated())
1103                 buf->writestring("deprecated ");
1104 
1105             emitProtection(buf, ad->protection);
1106             buf->printf("alias %s = ", ad->toChars());
1107 
1108             if (Dsymbol *s = ad->aliassym)  // ident alias
1109             {
1110                 prettyPrintDsymbol(s, ad->parent);
1111             }
1112             else if (Type *type = ad->getType())  // type alias
1113             {
1114                 if (type->ty == Tclass || type->ty == Tstruct || type->ty == Tenum)
1115                 {
1116                     if (Dsymbol *s = type->toDsymbol(NULL))  // elaborate type
1117                         prettyPrintDsymbol(s, ad->parent);
1118                     else
1119                         buf->writestring(type->toChars());
1120                 }
1121                 else
1122                 {
1123                     // simple type
1124                     buf->writestring(type->toChars());
1125                 }
1126             }
1127 
1128             buf->writestring(";\n");
1129         }
1130 
1131         void parentToBuffer(Dsymbol *s)
1132         {
1133             if (s && !s->isPackage() && !s->isModule())
1134             {
1135                 parentToBuffer(s->parent);
1136                 buf->writestring(s->toChars());
1137                 buf->writestring(".");
1138             }
1139         }
1140 
1141         static bool inSameModule(Dsymbol *s, Dsymbol *p)
1142         {
1143             for ( ; s ; s = s->parent)
1144             {
1145                 if (s->isModule())
1146                     break;
1147             }
1148 
1149             for ( ; p ; p = p->parent)
1150             {
1151                 if (p->isModule())
1152                     break;
1153             }
1154 
1155             return s == p;
1156         }
1157 
1158         void prettyPrintDsymbol(Dsymbol *s, Dsymbol *parent)
1159         {
1160             if (s->parent && (s->parent == parent))  // in current scope -> naked name
1161             {
1162                 buf->writestring(s->toChars());
1163             }
1164             else if (!inSameModule(s, parent))  // in another module -> full name
1165             {
1166                 buf->writestring(s->toPrettyChars());
1167             }
1168             else  // nested in a type in this module -> full name w/o module name
1169             {
1170                 // if alias is nested in a user-type use module-scope lookup
1171                 if (!parent->isModule() && !parent->isPackage())
1172                     buf->writestring(".");
1173 
1174                 parentToBuffer(s->parent);
1175                 buf->writestring(s->toChars());
1176             }
1177         }
1178 
1179         void visit(AggregateDeclaration *ad)
1180         {
1181             if (!ad->ident)
1182                 return;
1183 
1184             buf->printf("%s %s", ad->kind(), ad->toChars());
1185             buf->writestring(";\n");
1186         }
1187 
1188         void visit(StructDeclaration *sd)
1189         {
1190             //printf("StructDeclaration::toDocbuffer() %s\n", sd->toChars());
1191             if (!sd->ident)
1192                 return;
1193 
1194             if (TemplateDeclaration *td = getEponymousParent(sd))
1195             {
1196                 toDocBuffer(td, buf, sc);
1197             }
1198             else
1199             {
1200                 buf->printf("%s %s", sd->kind(), sd->toChars());
1201             }
1202             buf->writestring(";\n");
1203         }
1204 
1205         void visit(ClassDeclaration *cd)
1206         {
1207             //printf("ClassDeclaration::toDocbuffer() %s\n", cd->toChars());
1208             if (!cd->ident)
1209                 return;
1210 
1211             if (TemplateDeclaration *td = getEponymousParent(cd))
1212             {
1213                 toDocBuffer(td, buf, sc);
1214             }
1215             else
1216             {
1217                 if (!cd->isInterfaceDeclaration() && cd->isAbstract())
1218                     buf->writestring("abstract ");
1219                 buf->printf("%s %s", cd->kind(), cd->toChars());
1220             }
1221             int any = 0;
1222             for (size_t i = 0; i < cd->baseclasses->dim; i++)
1223             {
1224                 BaseClass *bc = (*cd->baseclasses)[i];
1225 
1226                 if (bc->sym && bc->sym->ident == Id::Object)
1227                     continue;
1228 
1229                 if (any)
1230                     buf->writestring(", ");
1231                 else
1232                 {
1233                     buf->writestring(": ");
1234                     any = 1;
1235                 }
1236                 emitProtection(buf, Prot(PROTpublic));
1237                 if (bc->sym)
1238                 {
1239                     buf->printf("$(DDOC_PSUPER_SYMBOL %s)", bc->sym->toPrettyChars());
1240                 }
1241                 else
1242                 {
1243                     HdrGenState hgs;
1244                     ::toCBuffer(bc->type, buf, NULL, &hgs);
1245                 }
1246             }
1247             buf->writestring(";\n");
1248         }
1249 
1250         void visit(EnumDeclaration *ed)
1251         {
1252             if (!ed->ident)
1253                 return;
1254 
1255             buf->printf("%s %s", ed->kind(), ed->toChars());
1256             if (ed->memtype)
1257             {
1258                 buf->writestring(": $(DDOC_ENUM_BASETYPE ");
1259                 HdrGenState hgs;
1260                 ::toCBuffer(ed->memtype, buf, NULL, &hgs);
1261                 buf->writestring(")");
1262             }
1263             buf->writestring(";\n");
1264         }
1265 
1266         void visit(EnumMember *em)
1267         {
1268             if (!em->ident)
1269                 return;
1270 
1271             buf->writestring(em->toChars());
1272         }
1273     };
1274 
1275     ToDocBuffer v(buf, sc);
1276     s->accept(&v);
1277 }
1278 
1279 /********************************* DocComment *********************************/
1280 
parse(Dsymbol * s,const utf8_t * comment)1281 DocComment *DocComment::parse(Dsymbol *s, const utf8_t *comment)
1282 {
1283     //printf("parse(%s): '%s'\n", s->toChars(), comment);
1284     DocComment *dc = new DocComment();
1285     dc->a.push(s);
1286     if (!comment)
1287         return dc;
1288 
1289     dc->parseSections(comment);
1290 
1291     for (size_t i = 0; i < dc->sections.dim; i++)
1292     {
1293         Section *sec = dc->sections[i];
1294 
1295         if (icmp("copyright", sec->name, sec->namelen) == 0)
1296         {
1297             dc->copyright = sec;
1298         }
1299         if (icmp("macros", sec->name, sec->namelen) == 0)
1300         {
1301             dc->macros = sec;
1302         }
1303     }
1304 
1305     return dc;
1306 }
1307 
1308 /*****************************************
1309  * Parse next paragraph out of *pcomment.
1310  * Update *pcomment to point past paragraph.
1311  * Returns NULL if no more paragraphs.
1312  * If paragraph ends in 'identifier:',
1313  * then (*pcomment)[0 .. idlen] is the identifier.
1314  */
1315 
parseSections(const utf8_t * comment)1316 void DocComment::parseSections(const utf8_t *comment)
1317 {
1318     const utf8_t *p;
1319     const utf8_t *pstart;
1320     const utf8_t *pend;
1321     const utf8_t *idstart = NULL;       // dead-store to prevent spurious warning
1322     size_t idlen;
1323 
1324     const utf8_t *name = NULL;
1325     size_t namelen = 0;
1326 
1327     //printf("parseSections('%s')\n", comment);
1328     p = comment;
1329     while (*p)
1330     {
1331         const utf8_t *pstart0 = p;
1332         p = skipwhitespace(p);
1333         pstart = p;
1334         pend = p;
1335 
1336         /* Find end of section, which is ended by one of:
1337          *      'identifier:' (but not inside a code section)
1338          *      '\0'
1339          */
1340         idlen = 0;
1341         int inCode = 0;
1342         while (1)
1343         {
1344             // Check for start/end of a code section
1345             if (*p == '-')
1346             {
1347                 if (!inCode)
1348                 {
1349                     // restore leading indentation
1350                     while (pstart0 < pstart && isIndentWS(pstart-1)) --pstart;
1351                 }
1352 
1353                 int numdash = 0;
1354                 while (*p == '-')
1355                 {
1356                     ++numdash;
1357                     p++;
1358                 }
1359                 // BUG: handle UTF PS and LS too
1360                 if ((!*p || *p == '\r' || *p == '\n') && numdash >= 3)
1361                     inCode ^= 1;
1362                 pend = p;
1363             }
1364 
1365             if (!inCode && isIdStart(p))
1366             {
1367                 const utf8_t *q = p + utfStride(p);
1368                 while (isIdTail(q))
1369                     q += utfStride(q);
1370                 // Detected tag ends it
1371                 if (*q == ':' && isupper(*p)
1372                         && (isspace(q[1]) || q[1] == 0))
1373                 {
1374                     idlen = q - p;
1375                     idstart = p;
1376                     for (pend = p; pend > pstart; pend--)
1377                     {
1378                         if (pend[-1] == '\n')
1379                             break;
1380                     }
1381                     p = q + 1;
1382                     break;
1383                 }
1384             }
1385             while (1)
1386             {
1387                 if (!*p)
1388                     goto L1;
1389                 if (*p == '\n')
1390                 {
1391                     p++;
1392                     if (*p == '\n' && !summary && !namelen && !inCode)
1393                     {
1394                         pend = p;
1395                         p++;
1396                         goto L1;
1397                     }
1398                     break;
1399                 }
1400                 p++;
1401                 pend = p;
1402             }
1403             p = skipwhitespace(p);
1404         }
1405       L1:
1406 
1407         if (namelen || pstart < pend)
1408         {
1409             Section *s;
1410             if (icmp("Params", name, namelen) == 0)
1411                 s = new ParamSection();
1412             else if (icmp("Macros", name, namelen) == 0)
1413                 s = new MacroSection();
1414             else
1415                 s = new Section();
1416             s->name = name;
1417             s->namelen = namelen;
1418             s->body = pstart;
1419             s->bodylen = pend - pstart;
1420             s->nooutput = 0;
1421 
1422             //printf("Section: '%.*s' = '%.*s'\n", s->namelen, s->name, s->bodylen, s->body);
1423 
1424             sections.push(s);
1425 
1426             if (!summary && !namelen)
1427                 summary = s;
1428         }
1429 
1430         if (idlen)
1431         {
1432             name = idstart;
1433             namelen = idlen;
1434         }
1435         else
1436         {
1437             name = NULL;
1438             namelen = 0;
1439             if (!*p)
1440                 break;
1441         }
1442     }
1443 }
1444 
writeSections(Scope * sc,Dsymbols * a,OutBuffer * buf)1445 void DocComment::writeSections(Scope *sc, Dsymbols *a, OutBuffer *buf)
1446 {
1447     assert(a->dim);
1448 
1449     //printf("DocComment::writeSections()\n");
1450     Loc loc = (*a)[0]->loc;
1451     if (Module *m = (*a)[0]->isModule())
1452     {
1453         if (m->md)
1454             loc = m->md->loc;
1455     }
1456 
1457     size_t offset1 = buf->offset;
1458     buf->writestring("$(DDOC_SECTIONS ");
1459     size_t offset2 = buf->offset;
1460 
1461     for (size_t i = 0; i < sections.dim; i++)
1462     {
1463         Section *sec = sections[i];
1464         if (sec->nooutput)
1465             continue;
1466 
1467         //printf("Section: '%.*s' = '%.*s'\n", sec->namelen, sec->name, sec->bodylen, sec->body);
1468         if (!sec->namelen && i == 0)
1469         {
1470             buf->writestring("$(DDOC_SUMMARY ");
1471             size_t o = buf->offset;
1472             buf->write(sec->body, sec->bodylen);
1473             escapeStrayParenthesis(loc, buf, o);
1474             highlightText(sc, a, buf, o);
1475             buf->writestring(")\n");
1476         }
1477         else
1478             sec->write(loc, this, sc, a, buf);
1479     }
1480 
1481     for (size_t i = 0; i < a->dim; i++)
1482     {
1483         Dsymbol *s = (*a)[i];
1484         if (Dsymbol *td = getEponymousParent(s))
1485             s = td;
1486 
1487         for (UnitTestDeclaration *utd = s->ddocUnittest; utd; utd = utd->ddocUnittest)
1488         {
1489             if (utd->protection.kind == PROTprivate || !utd->comment || !utd->fbody)
1490                 continue;
1491 
1492             // Strip whitespaces to avoid showing empty summary
1493             const utf8_t *c = utd->comment;
1494             while (*c == ' ' || *c == '\t' || *c == '\n' || *c == '\r') ++c;
1495 
1496             buf->writestring("$(DDOC_EXAMPLES ");
1497 
1498             size_t o = buf->offset;
1499             buf->writestring((const char *)c);
1500 
1501             if (utd->codedoc)
1502             {
1503                 size_t n = getCodeIndent(utd->codedoc);
1504                 while (n--) buf->writeByte(' ');
1505                 buf->writestring("----\n");
1506                 buf->writestring(utd->codedoc);
1507                 buf->writestring("----\n");
1508                 highlightText(sc, a, buf, o);
1509             }
1510 
1511             buf->writestring(")");
1512         }
1513     }
1514 
1515     if (buf->offset == offset2)
1516     {
1517         /* Didn't write out any sections, so back out last write
1518          */
1519         buf->offset = offset1;
1520         buf->writestring("$(DDOC_BLANKLINE)\n");
1521     }
1522     else
1523         buf->writestring(")\n");
1524 }
1525 
1526 /***************************************************
1527  */
1528 
write(Loc loc,DocComment *,Scope * sc,Dsymbols * a,OutBuffer * buf)1529 void Section::write(Loc loc, DocComment *, Scope *sc, Dsymbols *a, OutBuffer *buf)
1530 {
1531     assert(a->dim);
1532 
1533     if (namelen)
1534     {
1535         static const char *table[] =
1536         {
1537             "AUTHORS", "BUGS", "COPYRIGHT", "DATE",
1538             "DEPRECATED", "EXAMPLES", "HISTORY", "LICENSE",
1539             "RETURNS", "SEE_ALSO", "STANDARDS", "THROWS",
1540             "VERSION", NULL
1541         };
1542 
1543         for (size_t i = 0; table[i]; i++)
1544         {
1545             if (icmp(table[i], name, namelen) == 0)
1546             {
1547                 buf->printf("$(DDOC_%s ", table[i]);
1548                 goto L1;
1549             }
1550         }
1551 
1552         buf->writestring("$(DDOC_SECTION ");
1553 
1554             // Replace _ characters with spaces
1555             buf->writestring("$(DDOC_SECTION_H ");
1556             size_t o = buf->offset;
1557             for (size_t u = 0; u < namelen; u++)
1558             {
1559                 utf8_t c = name[u];
1560                 buf->writeByte((c == '_') ? ' ' : c);
1561             }
1562             escapeStrayParenthesis(loc, buf, o);
1563             buf->writestring(":)\n");
1564     }
1565     else
1566     {
1567         buf->writestring("$(DDOC_DESCRIPTION ");
1568     }
1569   L1:
1570     size_t o = buf->offset;
1571     buf->write(body, bodylen);
1572     escapeStrayParenthesis(loc, buf, o);
1573     highlightText(sc, a, buf, o);
1574     buf->writestring(")\n");
1575 }
1576 
1577 /***************************************************
1578  */
1579 
write(Loc loc,DocComment *,Scope * sc,Dsymbols * a,OutBuffer * buf)1580 void ParamSection::write(Loc loc, DocComment *, Scope *sc, Dsymbols *a, OutBuffer *buf)
1581 {
1582     assert(a->dim);
1583     Dsymbol *s = (*a)[0];   // test
1584 
1585     const utf8_t *p = body;
1586     size_t len = bodylen;
1587     const utf8_t *pend = p + len;
1588 
1589     const utf8_t *tempstart = NULL;
1590     size_t templen = 0;
1591 
1592     const utf8_t *namestart = NULL;
1593     size_t namelen = 0;       // !=0 if line continuation
1594 
1595     const utf8_t *textstart = NULL;
1596     size_t textlen = 0;
1597 
1598     size_t paramcount = 0;
1599 
1600     buf->writestring("$(DDOC_PARAMS ");
1601     while (p < pend)
1602     {
1603         // Skip to start of macro
1604         while (1)
1605         {
1606             switch (*p)
1607             {
1608                 case ' ':
1609                 case '\t':
1610                     p++;
1611                     continue;
1612 
1613                 case '\n':
1614                     p++;
1615                     goto Lcont;
1616 
1617                 default:
1618                     if (isIdStart(p) || isCVariadicArg(p, pend - p))
1619                         break;
1620                     if (namelen)
1621                         goto Ltext;             // continuation of prev macro
1622                     goto Lskipline;
1623             }
1624             break;
1625         }
1626         tempstart = p;
1627 
1628         while (isIdTail(p))
1629             p += utfStride(p);
1630         if (isCVariadicArg(p, pend - p))
1631             p += 3;
1632 
1633         templen = p - tempstart;
1634 
1635         while (*p == ' ' || *p == '\t')
1636             p++;
1637 
1638         if (*p != '=')
1639         {
1640             if (namelen)
1641                 goto Ltext;             // continuation of prev macro
1642             goto Lskipline;
1643         }
1644         p++;
1645 
1646         if (namelen)
1647         {
1648             // Output existing param
1649 
1650         L1:
1651             //printf("param '%.*s' = '%.*s'\n", namelen, namestart, textlen, textstart);
1652             ++paramcount;
1653             HdrGenState hgs;
1654             buf->writestring("$(DDOC_PARAM_ROW ");
1655             {
1656                 buf->writestring("$(DDOC_PARAM_ID ");
1657                 {
1658                     size_t o = buf->offset;
1659                     Parameter *fparam = isFunctionParameter(a, namestart, namelen);
1660                     if (!fparam)
1661                     {
1662                         // Comments on a template might refer to function parameters within.
1663                         // Search the parameters of nested eponymous functions (with the same name.)
1664                         fparam = isEponymousFunctionParameter(a, namestart, namelen);
1665                     }
1666                     bool isCVariadic = isCVariadicParameter(a, namestart, namelen);
1667                     if (isCVariadic)
1668                     {
1669                         buf->writestring("...");
1670                     }
1671                     else if (fparam && fparam->type && fparam->ident)
1672                     {
1673                         ::toCBuffer(fparam->type, buf, fparam->ident, &hgs);
1674                     }
1675                     else
1676                     {
1677                         if (isTemplateParameter(a, namestart, namelen))
1678                         {
1679                             // 10236: Don't count template parameters for params check
1680                             --paramcount;
1681                         }
1682                         else if (!fparam)
1683                         {
1684                             warning(s->loc, "Ddoc: function declaration has no parameter '%.*s'", (int)namelen, namestart);
1685                         }
1686                         buf->write(namestart, namelen);
1687                     }
1688                     escapeStrayParenthesis(loc, buf, o);
1689                     highlightCode(sc, a, buf, o);
1690                 }
1691                 buf->writestring(")\n");
1692 
1693                 buf->writestring("$(DDOC_PARAM_DESC ");
1694                 {
1695                     size_t o = buf->offset;
1696                     buf->write(textstart, textlen);
1697                     escapeStrayParenthesis(loc, buf, o);
1698                     highlightText(sc, a, buf, o);
1699                 }
1700                 buf->writestring(")");
1701             }
1702             buf->writestring(")\n");
1703             namelen = 0;
1704             if (p >= pend)
1705                 break;
1706         }
1707 
1708         namestart = tempstart;
1709         namelen = templen;
1710 
1711         while (*p == ' ' || *p == '\t')
1712             p++;
1713         textstart = p;
1714 
1715       Ltext:
1716         while (*p != '\n')
1717             p++;
1718         textlen = p - textstart;
1719         p++;
1720 
1721      Lcont:
1722         continue;
1723 
1724      Lskipline:
1725         // Ignore this line
1726         while (*p++ != '\n')
1727             ;
1728     }
1729     if (namelen)
1730         goto L1;                // write out last one
1731     buf->writestring(")\n");
1732 
1733     TypeFunction *tf = a->dim == 1 ? isTypeFunction(s) : NULL;
1734     if (tf)
1735     {
1736         size_t pcount = (tf->parameters ? tf->parameters->dim : 0) + (int)(tf->varargs == 1);
1737         if (pcount != paramcount)
1738         {
1739             warning(s->loc, "Ddoc: parameter count mismatch");
1740         }
1741     }
1742 }
1743 
1744 /***************************************************
1745  */
1746 
write(Loc,DocComment * dc,Scope *,Dsymbols *,OutBuffer *)1747 void MacroSection::write(Loc, DocComment *dc, Scope *, Dsymbols *, OutBuffer *)
1748 {
1749     //printf("MacroSection::write()\n");
1750     DocComment::parseMacros(dc->pescapetable, dc->pmacrotable, body, bodylen);
1751 }
1752 
1753 /************************************************
1754  * Parse macros out of Macros: section.
1755  * Macros are of the form:
1756  *      name1 = value1
1757  *
1758  *      name2 = value2
1759  */
1760 
parseMacros(Escape ** pescapetable,Macro ** pmacrotable,const utf8_t * m,size_t mlen)1761 void DocComment::parseMacros(Escape **pescapetable, Macro **pmacrotable, const utf8_t *m, size_t mlen)
1762 {
1763     const utf8_t *p = m;
1764     size_t len = mlen;
1765     const utf8_t *pend = p + len;
1766 
1767     const utf8_t *tempstart = NULL;
1768     size_t templen = 0;
1769 
1770     const utf8_t *namestart = NULL;
1771     size_t namelen = 0;       // !=0 if line continuation
1772 
1773     const utf8_t *textstart = NULL;
1774     size_t textlen = 0;
1775 
1776     while (p < pend)
1777     {
1778         // Skip to start of macro
1779         while (1)
1780         {
1781             if (p >= pend)
1782                 goto Ldone;
1783             switch (*p)
1784             {
1785                 case ' ':
1786                 case '\t':
1787                     p++;
1788                     continue;
1789 
1790                 case '\r':
1791                 case '\n':
1792                     p++;
1793                     goto Lcont;
1794 
1795                 default:
1796                     if (isIdStart(p))
1797                         break;
1798                     if (namelen)
1799                         goto Ltext;             // continuation of prev macro
1800                     goto Lskipline;
1801             }
1802             break;
1803         }
1804         tempstart = p;
1805 
1806         while (1)
1807         {
1808             if (p >= pend)
1809                 goto Ldone;
1810             if (!isIdTail(p))
1811                 break;
1812             p += utfStride(p);
1813         }
1814         templen = p - tempstart;
1815 
1816         while (1)
1817         {
1818             if (p >= pend)
1819                 goto Ldone;
1820             if (!(*p == ' ' || *p == '\t'))
1821                 break;
1822             p++;
1823         }
1824 
1825         if (*p != '=')
1826         {
1827             if (namelen)
1828                 goto Ltext;             // continuation of prev macro
1829             goto Lskipline;
1830         }
1831         p++;
1832         if (p >= pend)
1833             goto Ldone;
1834 
1835         if (namelen)
1836         {
1837             // Output existing macro
1838         L1:
1839             //printf("macro '%.*s' = '%.*s'\n", namelen, namestart, textlen, textstart);
1840             if (icmp("ESCAPES", namestart, namelen) == 0)
1841                 parseEscapes(pescapetable, textstart, textlen);
1842             else
1843                 Macro::define(pmacrotable, namestart, namelen, textstart, textlen);
1844             namelen = 0;
1845             if (p >= pend)
1846                 break;
1847         }
1848 
1849         namestart = tempstart;
1850         namelen = templen;
1851 
1852         while (p < pend && (*p == ' ' || *p == '\t'))
1853             p++;
1854         textstart = p;
1855 
1856       Ltext:
1857         while (p < pend && *p != '\r' && *p != '\n')
1858             p++;
1859         textlen = p - textstart;
1860 
1861         p++;
1862         //printf("p = %p, pend = %p\n", p, pend);
1863 
1864      Lcont:
1865         continue;
1866 
1867      Lskipline:
1868         // Ignore this line
1869         while (p < pend && *p != '\r' && *p != '\n')
1870             p++;
1871     }
1872 Ldone:
1873     if (namelen)
1874         goto L1;                // write out last one
1875 }
1876 
1877 /**************************************
1878  * Parse escapes of the form:
1879  *      /c/string/
1880  * where c is a single character.
1881  * Multiple escapes can be separated
1882  * by whitespace and/or commas.
1883  */
1884 
parseEscapes(Escape ** pescapetable,const utf8_t * textstart,size_t textlen)1885 void DocComment::parseEscapes(Escape **pescapetable, const utf8_t *textstart, size_t textlen)
1886 {
1887     Escape *escapetable = *pescapetable;
1888 
1889     if (!escapetable)
1890     {
1891         escapetable = new Escape;
1892         memset(escapetable, 0, sizeof(Escape));
1893         *pescapetable = escapetable;
1894     }
1895     //printf("parseEscapes('%.*s') pescapetable = %p\n", textlen, textstart, pescapetable);
1896     const utf8_t *p = textstart;
1897     const utf8_t *pend = p + textlen;
1898 
1899     while (1)
1900     {
1901         while (1)
1902         {
1903             if (p + 4 >= pend)
1904                 return;
1905             if (!(*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n' || *p == ','))
1906                 break;
1907             p++;
1908         }
1909         if (p[0] != '/' || p[2] != '/')
1910             return;
1911         utf8_t c = p[1];
1912         p += 3;
1913         const utf8_t *start = p;
1914         while (1)
1915         {
1916             if (p >= pend)
1917                 return;
1918             if (*p == '/')
1919                 break;
1920             p++;
1921         }
1922         size_t len = p - start;
1923         char *s = (char *)memcpy(mem.xmalloc(len + 1), start, len);
1924         s[len] = 0;
1925         escapetable->strings[c] = s;
1926         //printf("\t%c = '%s'\n", c, s);
1927         p++;
1928     }
1929 }
1930 
1931 
1932 /******************************************
1933  * Compare 0-terminated string with length terminated string.
1934  * Return < 0, ==0, > 0
1935  */
1936 
cmp(const char * stringz,const void * s,size_t slen)1937 int cmp(const char *stringz, const void *s, size_t slen)
1938 {
1939     size_t len1 = strlen(stringz);
1940 
1941     if (len1 != slen)
1942         return (int)(len1 - slen);
1943     return memcmp(stringz, s, slen);
1944 }
1945 
icmp(const char * stringz,const void * s,size_t slen)1946 int icmp(const char *stringz, const void *s, size_t slen)
1947 {
1948     size_t len1 = strlen(stringz);
1949 
1950     if (len1 != slen)
1951         return (int)(len1 - slen);
1952     return Port::memicmp(stringz, (const char *)s, slen);
1953 }
1954 
1955 /*****************************************
1956  * Return true if comment consists entirely of "ditto".
1957  */
1958 
isDitto(const utf8_t * comment)1959 bool isDitto(const utf8_t *comment)
1960 {
1961     if (comment)
1962     {
1963         const utf8_t *p = skipwhitespace(comment);
1964 
1965         if (Port::memicmp((const char *)p, "ditto", 5) == 0 && *skipwhitespace(p + 5) == 0)
1966             return true;
1967     }
1968     return false;
1969 }
1970 
1971 /**********************************************
1972  * Skip white space.
1973  */
1974 
skipwhitespace(const utf8_t * p)1975 const utf8_t *skipwhitespace(const utf8_t *p)
1976 {
1977     for (; 1; p++)
1978     {
1979         switch (*p)
1980         {
1981             case ' ':
1982             case '\t':
1983             case '\n':
1984                 continue;
1985         }
1986         break;
1987     }
1988     return p;
1989 }
1990 
1991 
1992 /************************************************
1993  * Scan forward to one of:
1994  *      start of identifier
1995  *      beginning of next line
1996  *      end of buf
1997  */
1998 
skiptoident(OutBuffer * buf,size_t i)1999 size_t skiptoident(OutBuffer *buf, size_t i)
2000 {
2001     while (i < buf->offset)
2002     {
2003         dchar_t c;
2004 
2005         size_t oi = i;
2006         if (utf_decodeChar((utf8_t *)buf->data, buf->offset, &i, &c))
2007         {
2008             /* Ignore UTF errors, but still consume input
2009              */
2010             break;
2011         }
2012         if (c >= 0x80)
2013         {
2014             if (!isUniAlpha(c))
2015                 continue;
2016         }
2017         else if (!(isalpha(c) || c == '_' || c == '\n'))
2018             continue;
2019         i = oi;
2020         break;
2021     }
2022     return i;
2023 }
2024 
2025 /************************************************
2026  * Scan forward past end of identifier.
2027  */
2028 
skippastident(OutBuffer * buf,size_t i)2029 size_t skippastident(OutBuffer *buf, size_t i)
2030 {
2031     while (i < buf->offset)
2032     {
2033         dchar_t c;
2034 
2035         size_t oi = i;
2036         if (utf_decodeChar((utf8_t *)buf->data, buf->offset, &i, &c))
2037         {
2038             /* Ignore UTF errors, but still consume input
2039              */
2040             break;
2041         }
2042         if (c >= 0x80)
2043         {
2044             if (isUniAlpha(c))
2045                 continue;
2046         }
2047         else if (isalnum(c) || c == '_')
2048             continue;
2049         i = oi;
2050         break;
2051     }
2052     return i;
2053 }
2054 
2055 
2056 /************************************************
2057  * Scan forward past URL starting at i.
2058  * We don't want to highlight parts of a URL.
2059  * Returns:
2060  *      i if not a URL
2061  *      index just past it if it is a URL
2062  */
2063 
skippastURL(OutBuffer * buf,size_t i)2064 size_t skippastURL(OutBuffer *buf, size_t i)
2065 {
2066     size_t length = buf->offset - i;
2067     utf8_t *p = (utf8_t *)&buf->data[i];
2068     size_t j;
2069     unsigned sawdot = 0;
2070 
2071     if (length > 7 && Port::memicmp((char *)p, "http://", 7) == 0)
2072     {
2073         j = 7;
2074     }
2075     else if (length > 8 && Port::memicmp((char *)p, "https://", 8) == 0)
2076     {
2077         j = 8;
2078     }
2079     else
2080         goto Lno;
2081 
2082     for (; j < length; j++)
2083     {
2084         utf8_t c = p[j];
2085         if (isalnum(c))
2086             continue;
2087         if (c == '-' || c == '_' || c == '?' ||
2088             c == '=' || c == '%' || c == '&' ||
2089             c == '/' || c == '+' || c == '#' ||
2090             c == '~')
2091             continue;
2092         if (c == '.')
2093         {
2094             sawdot = 1;
2095             continue;
2096         }
2097         break;
2098     }
2099     if (sawdot)
2100         return i + j;
2101 
2102 Lno:
2103     return i;
2104 }
2105 
2106 
2107 /****************************************************
2108  */
2109 
isIdentifier(Dsymbols * a,const utf8_t * p,size_t len)2110 bool isIdentifier(Dsymbols *a, const utf8_t *p, size_t len)
2111 {
2112     for (size_t i = 0; i < a->dim; i++)
2113     {
2114         const char *s = (*a)[i]->ident->toChars();
2115         if (cmp(s, p, len) == 0)
2116             return true;
2117     }
2118     return false;
2119 }
2120 
2121 /****************************************************
2122  */
2123 
isKeyword(utf8_t * p,size_t len)2124 bool isKeyword(utf8_t *p, size_t len)
2125 {
2126     static const char *table[] = { "true", "false", "null", NULL };
2127 
2128     for (int i = 0; table[i]; i++)
2129     {
2130         if (cmp(table[i], p, len) == 0)
2131             return true;
2132     }
2133     return false;
2134 }
2135 
2136 /****************************************************
2137  */
2138 
isTypeFunction(Dsymbol * s)2139 TypeFunction *isTypeFunction(Dsymbol *s)
2140 {
2141     FuncDeclaration *f = s->isFuncDeclaration();
2142 
2143     /* f->type may be NULL for template members.
2144      */
2145     if (f && f->type)
2146     {
2147         Type *t = f->originalType ? f->originalType : f->type;
2148         if (t->ty == Tfunction)
2149             return (TypeFunction *)t;
2150     }
2151     return NULL;
2152 }
2153 
2154 /****************************************************
2155  */
2156 
isFunctionParameter(Dsymbols * a,const utf8_t * p,size_t len)2157 Parameter *isFunctionParameter(Dsymbols *a, const utf8_t *p, size_t len)
2158 {
2159     for (size_t i = 0; i < a->dim; i++)
2160     {
2161         Parameter *fparam = isFunctionParameter((*a)[i], p, len);
2162         if (fparam)
2163         {
2164             return fparam;
2165         }
2166     }
2167     return NULL;
2168 }
2169 
2170 /****************************************************
2171  */
2172 
isTemplateParameter(Dsymbols * a,const utf8_t * p,size_t len)2173 TemplateParameter *isTemplateParameter(Dsymbols *a, const utf8_t *p, size_t len)
2174 {
2175     for (size_t i = 0; i < a->dim; i++)
2176     {
2177         TemplateDeclaration *td = (*a)[i]->isTemplateDeclaration();
2178         // Check for the parent, if the current symbol is not a template declaration.
2179         if (!td)
2180             td = getEponymousParent((*a)[i]);
2181         if (td && td->origParameters)
2182         {
2183             for (size_t k = 0; k < td->origParameters->dim; k++)
2184             {
2185                 TemplateParameter *tp = (*td->origParameters)[k];
2186                 if (tp->ident && cmp(tp->ident->toChars(), p, len) == 0)
2187                 {
2188                     return tp;
2189                 }
2190             }
2191         }
2192     }
2193     return NULL;
2194 }
2195 
2196 /****************************************************
2197  * Return true if str is a reserved symbol name
2198  * that starts with a double underscore.
2199  */
2200 
isReservedName(utf8_t * str,size_t len)2201 bool isReservedName(utf8_t *str, size_t len)
2202 {
2203     static const char *table[] = {
2204         "__ctor", "__dtor", "__postblit", "__invariant", "__unitTest",
2205         "__require", "__ensure", "__dollar", "__ctfe", "__withSym", "__result",
2206         "__returnLabel", "__vptr", "__monitor", "__gate", "__xopEquals", "__xopCmp",
2207         "__LINE__", "__FILE__", "__MODULE__", "__FUNCTION__", "__PRETTY_FUNCTION__",
2208         "__DATE__", "__TIME__", "__TIMESTAMP__", "__VENDOR__", "__VERSION__",
2209         "__EOF__", "__LOCAL_SIZE", "___tls_get_addr", "__entrypoint", NULL };
2210 
2211     for (int i = 0; table[i]; i++)
2212     {
2213         if (cmp(table[i], str, len) == 0)
2214             return true;
2215     }
2216     return false;
2217 }
2218 
2219 /**************************************************
2220  * Highlight text section.
2221  */
2222 
highlightText(Scope * sc,Dsymbols * a,OutBuffer * buf,size_t offset)2223 void highlightText(Scope *sc, Dsymbols *a, OutBuffer *buf, size_t offset)
2224 {
2225     Dsymbol *s = a->dim ? (*a)[0] : NULL;   // test
2226 
2227     //printf("highlightText()\n");
2228 
2229     int leadingBlank = 1;
2230     int inCode = 0;
2231     int inBacktick = 0;
2232     //int inComment = 0;                  // in <!-- ... --> comment
2233     size_t iCodeStart = 0;                    // start of code section
2234     size_t codeIndent = 0;
2235 
2236     size_t iLineStart = offset;
2237 
2238     for (size_t i = offset; i < buf->offset; i++)
2239     {
2240         utf8_t c = buf->data[i];
2241 
2242      Lcont:
2243         switch (c)
2244         {
2245             case ' ':
2246             case '\t':
2247                 break;
2248 
2249             case '\n':
2250                 if (inBacktick)
2251                 {
2252                     // `inline code` is only valid if contained on a single line
2253                     // otherwise, the backticks should be output literally.
2254                     //
2255                     // This lets things like `output from the linker' display
2256                     // unmolested while keeping the feature consistent with GitHub.
2257 
2258                     inBacktick = false;
2259                     inCode = false; // the backtick also assumes we're in code
2260 
2261                     // Nothing else is necessary since the DDOC_BACKQUOTED macro is
2262                     // inserted lazily at the close quote, meaning the rest of the
2263                     // text is already OK.
2264                 }
2265 
2266                 if (!sc->_module->isDocFile &&
2267                     !inCode && i == iLineStart && i + 1 < buf->offset)    // if "\n\n"
2268                 {
2269                     static const char blankline[] = "$(DDOC_BLANKLINE)\n";
2270 
2271                     i = buf->insert(i, blankline, strlen(blankline));
2272                 }
2273                 leadingBlank = 1;
2274                 iLineStart = i + 1;
2275                 break;
2276 
2277             case '<':
2278             {
2279                 leadingBlank = 0;
2280                 if (inCode)
2281                     break;
2282                 utf8_t *p = (utf8_t *)&buf->data[i];
2283                 const char *se = sc->_module->escapetable->escapeChar('<');
2284                 if (se && strcmp(se, "&lt;") == 0)
2285                 {
2286                     // Generating HTML
2287                     // Skip over comments
2288                     if (p[1] == '!' && p[2] == '-' && p[3] == '-')
2289                     {
2290                         size_t j = i + 4;
2291                         p += 4;
2292                         while (1)
2293                         {
2294                             if (j == buf->offset)
2295                                 goto L1;
2296                             if (p[0] == '-' && p[1] == '-' && p[2] == '>')
2297                             {
2298                                 i = j + 2;  // place on closing '>'
2299                                 break;
2300                             }
2301                             j++;
2302                             p++;
2303                         }
2304                         break;
2305                     }
2306 
2307                     // Skip over HTML tag
2308                     if (isalpha(p[1]) || (p[1] == '/' && isalpha(p[2])))
2309                     {
2310                         size_t j = i + 2;
2311                         p += 2;
2312                         while (1)
2313                         {
2314                             if (j == buf->offset)
2315                                 break;
2316                             if (p[0] == '>')
2317                             {
2318                                 i = j;      // place on closing '>'
2319                                 break;
2320                             }
2321                             j++;
2322                             p++;
2323                         }
2324                         break;
2325                     }
2326                 }
2327             L1:
2328                 // Replace '<' with '&lt;' character entity
2329                 if (se)
2330                 {
2331                     size_t len = strlen(se);
2332                     buf->remove(i, 1);
2333                     i = buf->insert(i, se, len);
2334                     i--;        // point to ';'
2335                 }
2336                 break;
2337             }
2338             case '>':
2339             {
2340                 leadingBlank = 0;
2341                 if (inCode)
2342                     break;
2343                 // Replace '>' with '&gt;' character entity
2344                 const char *se = sc->_module->escapetable->escapeChar('>');
2345                 if (se)
2346                 {
2347                     size_t len = strlen(se);
2348                     buf->remove(i, 1);
2349                     i = buf->insert(i, se, len);
2350                     i--;        // point to ';'
2351                 }
2352                 break;
2353             }
2354             case '&':
2355             {
2356                 leadingBlank = 0;
2357                 if (inCode)
2358                     break;
2359                 utf8_t *p = (utf8_t *)&buf->data[i];
2360                 if (p[1] == '#' || isalpha(p[1]))
2361                     break;                      // already a character entity
2362                 // Replace '&' with '&amp;' character entity
2363                 const char *se = sc->_module->escapetable->escapeChar('&');
2364                 if (se)
2365                 {
2366                     size_t len = strlen(se);
2367                     buf->remove(i, 1);
2368                     i = buf->insert(i, se, len);
2369                     i--;        // point to ';'
2370                 }
2371                 break;
2372             }
2373             case '`':
2374             {
2375                 if (inBacktick)
2376                 {
2377                     inBacktick = 0;
2378                     inCode = 0;
2379 
2380                     OutBuffer codebuf;
2381 
2382                     codebuf.write(buf->data + iCodeStart + 1, i - (iCodeStart + 1));
2383 
2384                     // escape the contents, but do not perform highlighting except for DDOC_PSYMBOL
2385                     highlightCode(sc, a, &codebuf, 0);
2386 
2387                     buf->remove(iCodeStart, i - iCodeStart + 1); // also trimming off the current `
2388 
2389                     static const char pre[] = "$(DDOC_BACKQUOTED ";
2390                     i = buf->insert(iCodeStart, pre, strlen(pre));
2391                     i = buf->insert(i, (char *)codebuf.data, codebuf.offset);
2392                     i = buf->insert(i, ")", 1);
2393 
2394                     i--; // point to the ending ) so when the for loop does i++, it will see the next character
2395 
2396                     break;
2397                 }
2398 
2399                 if (inCode)
2400                     break;
2401 
2402                 inCode = 1;
2403                 inBacktick = 1;
2404                 codeIndent = 0; // inline code is not indented
2405 
2406                 // All we do here is set the code flags and record
2407                 // the location. The macro will be inserted lazily
2408                 // so we can easily cancel the inBacktick if we come
2409                 // across a newline character.
2410                 iCodeStart = i;
2411 
2412                 break;
2413             }
2414             case '-':
2415                 /* A line beginning with --- delimits a code section.
2416                  * inCode tells us if it is start or end of a code section.
2417                  */
2418                 if (leadingBlank)
2419                 {
2420                     size_t istart = i;
2421                     size_t eollen = 0;
2422 
2423                     leadingBlank = 0;
2424                     while (1)
2425                     {
2426                         ++i;
2427                         if (i >= buf->offset)
2428                             break;
2429                         c = buf->data[i];
2430                         if (c == '\n')
2431                         {
2432                             eollen = 1;
2433                             break;
2434                         }
2435                         if (c == '\r')
2436                         {
2437                             eollen = 1;
2438                             if (i + 1 >= buf->offset)
2439                                 break;
2440                             if (buf->data[i + 1] == '\n')
2441                             {
2442                                 eollen = 2;
2443                                 break;
2444                             }
2445                         }
2446                         // BUG: handle UTF PS and LS too
2447                         if (c != '-')
2448                             goto Lcont;
2449                     }
2450                     if (i - istart < 3)
2451                         goto Lcont;
2452 
2453                     // We have the start/end of a code section
2454 
2455                     // Remove the entire --- line, including blanks and \n
2456                     buf->remove(iLineStart, i - iLineStart + eollen);
2457                     i = iLineStart;
2458 
2459                     if (inCode && (i <= iCodeStart))
2460                     {
2461                         // Empty code section, just remove it completely.
2462                         inCode = 0;
2463                         break;
2464                     }
2465 
2466                     if (inCode)
2467                     {
2468                         inCode = 0;
2469                         // The code section is from iCodeStart to i
2470                         OutBuffer codebuf;
2471 
2472                         codebuf.write(buf->data + iCodeStart, i - iCodeStart);
2473                         codebuf.writeByte(0);
2474 
2475                         // Remove leading indentations from all lines
2476                         bool lineStart = true;
2477                         utf8_t *endp = (utf8_t *)codebuf.data + codebuf.offset;
2478                         for (utf8_t *p = (utf8_t *)codebuf.data; p < endp; )
2479                         {
2480                             if (lineStart)
2481                             {
2482                                 size_t j = codeIndent;
2483                                 utf8_t *q = p;
2484                                 while (j-- > 0 && q < endp && isIndentWS(q))
2485                                     ++q;
2486                                 codebuf.remove(p - (utf8_t *)codebuf.data, q - p);
2487                                 assert((utf8_t *)codebuf.data <= p);
2488                                 assert(p < (utf8_t *)codebuf.data + codebuf.offset);
2489                                 lineStart = false;
2490                                 endp = (utf8_t *)codebuf.data + codebuf.offset; // update
2491                                 continue;
2492                             }
2493                             if (*p == '\n')
2494                                 lineStart = true;
2495                             ++p;
2496                         }
2497 
2498                         highlightCode2(sc, a, &codebuf, 0);
2499                         buf->remove(iCodeStart, i - iCodeStart);
2500                         i = buf->insert(iCodeStart, codebuf.data, codebuf.offset);
2501                         i = buf->insert(i, (const char *)")\n", 2);
2502                         i -= 2; // in next loop, c should be '\n'
2503                     }
2504                     else
2505                     {
2506                         static const char d_code[] = "$(D_CODE ";
2507 
2508                         inCode = 1;
2509                         codeIndent = istart - iLineStart;  // save indent count
2510                         i = buf->insert(i, d_code, strlen(d_code));
2511                         iCodeStart = i;
2512                         i--;            // place i on >
2513                         leadingBlank = true;
2514                     }
2515                 }
2516                 break;
2517 
2518             default:
2519                 leadingBlank = 0;
2520                 if (sc->_module->isDocFile || inCode)
2521                     break;
2522 
2523                 utf8_t *start = (utf8_t *)buf->data + i;
2524                 if (isIdStart(start))
2525                 {
2526                     size_t j = skippastident(buf, i);
2527                     if (i < j)
2528                     {
2529                         size_t k = skippastURL(buf, i);
2530                         if (i < k)
2531                         {
2532                             i = k - 1;
2533                             break;
2534                         }
2535                     }
2536                     else
2537                         break;
2538                     size_t len = j - i;
2539 
2540                     // leading '_' means no highlight unless it's a reserved symbol name
2541                     if (c == '_' &&
2542                         (i == 0 || !isdigit(*(start - 1))) &&
2543                         (i == buf->offset - 1 || !isReservedName(start, len)))
2544                     {
2545                         buf->remove(i, 1);
2546                         i = j - 1;
2547                         break;
2548                     }
2549                     if (isIdentifier(a, start, len))
2550                     {
2551                         i = buf->bracket(i, "$(DDOC_PSYMBOL ", j, ")") - 1;
2552                         break;
2553                     }
2554                     if (isKeyword(start, len))
2555                     {
2556                         i = buf->bracket(i, "$(DDOC_KEYWORD ", j, ")") - 1;
2557                         break;
2558                     }
2559                     if (isFunctionParameter(a, start, len))
2560                     {
2561                         //printf("highlighting arg '%s', i = %d, j = %d\n", arg->ident->toChars(), i, j);
2562                         i = buf->bracket(i, "$(DDOC_PARAM ", j, ")") - 1;
2563                         break;
2564                     }
2565 
2566                     i = j - 1;
2567                 }
2568                 break;
2569         }
2570     }
2571     if (inCode)
2572         error(s ? s->loc : Loc(), "unmatched --- in DDoc comment");
2573 }
2574 
2575 /**************************************************
2576  * Highlight code for DDOC section.
2577  */
2578 
highlightCode(Scope * sc,Dsymbol * s,OutBuffer * buf,size_t offset)2579 void highlightCode(Scope *sc, Dsymbol *s, OutBuffer *buf, size_t offset)
2580 {
2581     //printf("highlightCode(s = %s '%s')\n", s->kind(), s->toChars());
2582     OutBuffer ancbuf;
2583     emitAnchor(&ancbuf, s, sc);
2584     buf->insert(offset, (char *)ancbuf.data, ancbuf.offset);
2585     offset += ancbuf.offset;
2586 
2587     Dsymbols a;
2588     a.push(s);
2589     highlightCode(sc, &a, buf, offset);
2590 }
2591 
2592 /****************************************************
2593  */
2594 
highlightCode(Scope * sc,Dsymbols * a,OutBuffer * buf,size_t offset)2595 void highlightCode(Scope *sc, Dsymbols *a, OutBuffer *buf, size_t offset)
2596 {
2597     //printf("highlightCode(a = '%s')\n", a->toChars());
2598 
2599     for (size_t i = offset; i < buf->offset; i++)
2600     {
2601         utf8_t c = buf->data[i];
2602         const char *se = sc->_module->escapetable->escapeChar(c);
2603         if (se)
2604         {
2605             size_t len = strlen(se);
2606             buf->remove(i, 1);
2607             i = buf->insert(i, se, len);
2608             i--;                // point to ';'
2609             continue;
2610         }
2611 
2612         utf8_t *start = (utf8_t *)buf->data + i;
2613         if (isIdStart(start))
2614         {
2615             size_t j = skippastident(buf, i);
2616             if (i < j)
2617             {
2618                 size_t len = j - i;
2619                 if (isIdentifier(a, start, len))
2620                 {
2621                     i = buf->bracket(i, "$(DDOC_PSYMBOL ", j, ")") - 1;
2622                     continue;
2623                 }
2624                 if (isFunctionParameter(a, start, len))
2625                 {
2626                     //printf("highlighting arg '%s', i = %d, j = %d\n", arg->ident->toChars(), i, j);
2627                     i = buf->bracket(i, "$(DDOC_PARAM ", j, ")") - 1;
2628                     continue;
2629                 }
2630                 i = j - 1;
2631             }
2632         }
2633     }
2634 }
2635 
2636 /****************************************
2637  */
2638 
highlightCode3(Scope * sc,OutBuffer * buf,const utf8_t * p,const utf8_t * pend)2639 void highlightCode3(Scope *sc, OutBuffer *buf, const utf8_t *p, const utf8_t *pend)
2640 {
2641     for (; p < pend; p++)
2642     {
2643         const char *s = sc->_module->escapetable->escapeChar(*p);
2644         if (s)
2645             buf->writestring(s);
2646         else
2647             buf->writeByte(*p);
2648     }
2649 }
2650 
2651 /**************************************************
2652  * Highlight code for CODE section.
2653  */
2654 
highlightCode2(Scope * sc,Dsymbols * a,OutBuffer * buf,size_t offset)2655 void highlightCode2(Scope *sc, Dsymbols *a, OutBuffer *buf, size_t offset)
2656 {
2657     unsigned errorsave = global.errors;
2658     Lexer lex(NULL, (utf8_t *)buf->data, 0, buf->offset - 1, 0, 1);
2659     OutBuffer res;
2660     const utf8_t *lastp = (utf8_t *)buf->data;
2661 
2662     //printf("highlightCode2('%.*s')\n", buf->offset - 1, buf->data);
2663     res.reserve(buf->offset);
2664     while (1)
2665     {
2666         Token tok;
2667         lex.scan(&tok);
2668         highlightCode3(sc, &res, lastp, tok.ptr);
2669 
2670         const char *highlight = NULL;
2671         switch (tok.value)
2672         {
2673             case TOKidentifier:
2674             {
2675                 if (!sc)
2676                     break;
2677                 size_t len = lex.p - tok.ptr;
2678                 if (isIdentifier(a, tok.ptr, len))
2679                 {
2680                     highlight = "$(D_PSYMBOL ";
2681                     break;
2682                 }
2683                 if (isFunctionParameter(a, tok.ptr, len))
2684                 {
2685                     //printf("highlighting arg '%s', i = %d, j = %d\n", arg->ident->toChars(), i, j);
2686                     highlight = "$(D_PARAM ";
2687                     break;
2688                 }
2689                 break;
2690             }
2691             case TOKcomment:
2692                 highlight = "$(D_COMMENT ";
2693                 break;
2694 
2695             case TOKstring:
2696                 highlight = "$(D_STRING ";
2697                 break;
2698 
2699             default:
2700                 if (tok.isKeyword())
2701                     highlight = "$(D_KEYWORD ";
2702                 break;
2703         }
2704         if (highlight)
2705         {
2706             res.writestring(highlight);
2707             size_t o = res.offset;
2708             highlightCode3(sc, &res, tok.ptr, lex.p);
2709             if (tok.value == TOKcomment || tok.value == TOKstring)
2710                 escapeDdocString(&res, o);  // Bugzilla 7656, 7715, and 10519
2711             res.writeByte(')');
2712         }
2713         else
2714             highlightCode3(sc, &res, tok.ptr, lex.p);
2715         if (tok.value == TOKeof)
2716             break;
2717         lastp = lex.p;
2718     }
2719     buf->setsize(offset);
2720     buf->write(&res);
2721     global.errors = errorsave;
2722 }
2723 
2724 /***************************************
2725  * Find character string to replace c with.
2726  */
2727 
escapeChar(unsigned c)2728 const char *Escape::escapeChar(unsigned c)
2729 {
2730     assert(c < 256);
2731     //printf("escapeChar('%c') => %p, %p\n", c, strings, strings[c]);
2732     return strings[c];
2733 }
2734 
2735 /****************************************
2736  * Determine if p points to the start of a "..." parameter identifier.
2737  */
2738 
isCVariadicArg(const utf8_t * p,size_t len)2739 bool isCVariadicArg(const utf8_t *p, size_t len)
2740 {
2741     return len >= 3 && cmp("...", p, 3) == 0;
2742 }
2743 
2744 /****************************************
2745  * Determine if p points to the start of an identifier.
2746  */
2747 
isIdStart(const utf8_t * p)2748 bool isIdStart(const utf8_t *p)
2749 {
2750     unsigned c = *p;
2751     if (isalpha(c) || c == '_')
2752         return true;
2753     if (c >= 0x80)
2754     {
2755         size_t i = 0;
2756         if (utf_decodeChar(p, 4, &i, &c))
2757             return false;   // ignore errors
2758         if (isUniAlpha(c))
2759             return true;
2760     }
2761     return false;
2762 }
2763 
2764 /****************************************
2765  * Determine if p points to the rest of an identifier.
2766  */
2767 
isIdTail(const utf8_t * p)2768 bool isIdTail(const utf8_t *p)
2769 {
2770     unsigned c = *p;
2771     if (isalnum(c) || c == '_')
2772         return true;
2773     if (c >= 0x80)
2774     {
2775         size_t i = 0;
2776         if (utf_decodeChar(p, 4, &i, &c))
2777             return false;   // ignore errors
2778         if (isUniAlpha(c))
2779             return true;
2780     }
2781     return false;
2782 }
2783 
2784 /****************************************
2785  * Determine if p points to the indentation space.
2786  */
2787 
isIndentWS(const utf8_t * p)2788 bool isIndentWS(const utf8_t *p)
2789 {
2790     return (*p == ' ') || (*p == '\t');
2791 }
2792 
2793 /*****************************************
2794  * Return number of bytes in UTF character.
2795  */
2796 
utfStride(const utf8_t * p)2797 int utfStride(const utf8_t *p)
2798 {
2799     unsigned c = *p;
2800     if (c < 0x80)
2801         return 1;
2802     size_t i = 0;
2803     utf_decodeChar(p, 4, &i, &c);       // ignore errors, but still consume input
2804     return (int)i;
2805 }
2806