1 /* -----------------------------------------------------------------------------
2  * This file is part of SWIG, which is licensed as a whole under version 3
3  * (or any later version) of the GNU General Public License. Some additional
4  * terms also apply to certain portions of SWIG. The full details of the SWIG
5  * license and copyrights can be found in the LICENSE and COPYRIGHT files
6  * included with the SWIG source code as distributed by the SWIG developers
7  * and at http://www.swig.org/legal.html.
8  *
9  * doxyparser.h
10  * ----------------------------------------------------------------------------- */
11 
12 #ifndef DOXYGENPARSER_H_
13 #define DOXYGENPARSER_H_
14 #include <string>
15 #include <list>
16 #include <map>
17 #include <vector>
18 #include <set>
19 
20 #include "swig.h"
21 
22 #include "doxyentity.h"
23 
24 // Utility function to return the base part of a command that may
25 // include options, e.g. param[in] -> param
26 std::string getBaseCommand(const std::string &cmd);
27 
28 
29 class DoxygenParser {
30 private:
31 
32   enum DoxyCommandEnum {
33     NONE = -1,
34     SIMPLECOMMAND,
35     COMMANDWORD,
36     COMMANDLINE,
37     COMMANDPARAGRAPH,
38     COMMANDENDCOMMAND,
39     COMMANDWORDPARAGRAPH,
40     COMMANDWORDLINE,
41     COMMANDWORDOWORDWORD,
42     COMMANDOWORD,
43     COMMANDERRORTHROW,
44     COMMANDUNIQUE,
45     COMMAND_HTML,
46     COMMAND_HTML_ENTITY,
47     COMMAND_ALIAS,
48     COMMAND_IGNORE,
49     END_LINE,
50     PARAGRAPH_END,
51     PLAINSTRING,
52     COMMAND
53   };
54 
55 
56   /** This class contains parts of Doxygen comment as a token. */
57   class Token {
58   public:
59     DoxyCommandEnum m_tokenType;
60     std::string m_tokenString; /* the data , such as param for @param */
61 
Token(DoxyCommandEnum tType,std::string tString)62     Token(DoxyCommandEnum tType, std::string tString) : m_tokenType(tType), m_tokenString(tString) {
63     }
64 
toString()65     std::string toString() const {
66       switch (m_tokenType) {
67       case END_LINE:
68         return "{END OF LINE}";
69       case PARAGRAPH_END:
70         return "{END OF PARAGRAPH}";
71       case PLAINSTRING:
72         return "{PLAINSTRING :" + m_tokenString + "}";
73       case COMMAND:
74         return "{COMMAND : " + m_tokenString + "}";
75       default:
76         return "";
77       }
78     }
79   };
80 
81 
82   typedef std::vector<Token> TokenList;
83   typedef TokenList::const_iterator TokenListCIt;
84   typedef TokenList::iterator TokenListIt;
85 
86   TokenList m_tokenList;
87   TokenListCIt m_tokenListIt;
88 
89   typedef std::map<std::string, DoxyCommandEnum> DoxyCommandsMap;
90   typedef DoxyCommandsMap::iterator DoxyCommandsMapIt;
91 
92   /*
93    * Map of Doxygen commands to determine if a string is a
94    * command and how it needs to be parsed
95    */
96   static DoxyCommandsMap doxygenCommands;
97   static std::set<std::string> doxygenSectionIndicators;
98 
99   bool m_isVerbatimText; // used to handle \htmlonly and \verbatim commands
100   bool m_isInQuotedString;
101 
102   Node *m_node;
103   std::string m_fileName;
104   int m_fileLineNo;
105 
106   /*
107    * Return the end command for a command appearing in "ignore" feature or empty
108    * string if this is a simple command and not a block one.
109    */
110   std::string getIgnoreFeatureEndCommand(const std::string &theCommand) const;
111 
112   /*
113    * Helper for getting the value of doxygen:ignore feature or its argument.
114    */
115   String *getIgnoreFeature(const std::string &theCommand, const char *argument = NULL) const;
116 
117   /*
118    * Whether to print lots of debug info during parsing
119    */
120   bool noisy;
121 
122   /*
123    *Changes a std::string to all lower case
124    */
125   std::string stringToLower(const std::string &stringToConvert);
126 
127   /*
128    * isSectionIndicator returns a boolean if the command is a section indicator
129    * This is a helper method for finding the end of a paragraph
130    * by Doxygen's terms
131    */
132   bool isSectionIndicator(const std::string &smallString);
133   /*
134    * Determines how a command should be handled (what group it belongs to
135    * for parsing rules
136    */
137   DoxyCommandEnum commandBelongs(const std::string &theCommand);
138 
139   /*
140    *prints the parse tree
141    */
142   void printTree(const std::list<DoxygenEntity> &rootList);
143 
144   /**
145    * Returns true if the next token is end of line token. This is important
146    * when single word commands like \c are at the end of line.
147    */
148   bool isEndOfLine();
149 
150   /**
151    * Skips spaces, tabs, and end of line tokens.
152    */
153   void skipWhitespaceTokens();
154 
155   /**
156    * Removes all spaces and tabs from beginning end end of string.
157    */
158   std::string trim(const std::string &text);
159 
160   /*
161    * Returns string of the next token if the next token is PLAINSTRING. Returns
162    * empty string otherwise.
163    */
164   std::string getNextToken();
165 
166   /*
167    * Returns the next word ON THE CURRENT LINE ONLY
168    * if a new line is encountered, returns a blank std::string.
169    * Updates the iterator if successful.
170    */
171   std::string getNextWord();
172 
173   /*
174    * Returns the next word, which is not necessarily on the same line.
175    * Updates the iterator if successful.
176    */
177   std::string getNextWordInComment();
178 
179   /*
180    * Returns the location of the end of the line as
181    * an iterator.
182    */
183   TokenListCIt getOneLine(const TokenList &tokList);
184 
185   /*
186    * Returns a properly formatted std::string
187    * up til ANY command or end of line is encountered.
188    */
189   std::string getStringTilCommand(const TokenList &tokList);
190 
191   /*
192    * Returns a properly formatted std::string
193    * up til the command specified is encountered
194    */
195   //TODO check that this behaves properly for formulas
196   std::string getStringTilEndCommand(const std::string &theCommand, const TokenList &tokList);
197 
198   /*
199    * Returns the end of a Paragraph as an iterator-
200    * Paragraph is defined in Doxygen to be a paragraph of text
201    * separated by either a structural command or a blank line
202    */
203   TokenListCIt getEndOfParagraph(const TokenList &tokList);
204 
205   /*
206    * Returns the end of a section, defined as the first blank line OR first
207    * encounter of the same command. Example of this behaviour is \arg.
208    * If no end is encountered, returns the last token of the std::list.
209    */
210   TokenListCIt getEndOfSection(const std::string &theCommand, const TokenList &tokList);
211 
212   /*
213    * This method is for returning the end of a specific form of doxygen command
214    * that begins with a \command and ends in \endcommand
215    * such as \code and \endcode. The proper usage is
216    * progressTilEndCommand("endcode", tokenList);
217    * If the end is never encountered, it returns the end of the std::list.
218    */
219   TokenListCIt getEndCommand(const std::string &theCommand, const TokenList &tokList);
220   /*
221    * A special method for commands such as \arg that end at the end of a
222    * paragraph OR when another \arg is encountered
223   //TODO getTilAnyCommand
224   TokenListCIt getTilAnyCommand(const std::string &theCommand, const TokenList &tokList);
225    */
226 
227   /**
228    * This methods skips end of line token, if it is the next token to be
229    * processed. It is called with comment commands which have args till the
230    * end of line, such as 'addtogroup' or 'addindex'.
231    * It is up to translator to specific language to decide whether
232    * to insert eol or not. For example, if a command is ignored in target
233    * language, new lines may make formatting ugly (Python).
234    */
235   void skipEndOfLine();
236 
237   /*
238    * Method for Adding a Simple Command
239    * Format: @command
240    * Plain commands, such as newline etc, they contain no other data
241    *  \n \\ \@ \& \$ \# \< \> \%
242    */
243   void addSimpleCommand(const std::string &theCommand, DoxygenEntityList &doxyList);
244   /*
245    * CommandWord
246    * Format: @command <word>
247    * Commands with a single WORD after then such as @b
248    * "a", "b", "c", "e", "em", "p", "def", "enum", "example", "package",
249    * "relates", "namespace", "relatesalso","anchor", "dontinclude", "include",
250    * "includelineno"
251    */
252   void addCommandWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
253   /*
254    * CommandLine
255    * Format: @command (line)
256    * Commands with a single LINE after then such as @var
257    * "addindex", "fn", "name", "line", "var", "skipline", "typedef", "skip",
258    * "until", "property"
259    */
260   void addCommandLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
261   /*
262    * CommandParagraph
263    * Format: @command {paragraph}
264    * Commands with a single paragraph after then such as @return
265    * "return", "remarks", "since", "test", "sa", "see", "pre", "post",
266    * "details", "invariant", "deprecated", "date", "note", "warning",
267    * "version", "todo", "bug", "attention", "brief", "arg", "author"
268    */
269   void addCommandParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
270   /*
271    * Command EndCommand
272    * Format: @command and ends at @endcommand
273    * Commands that take in a block of text such as @code:
274    * "code", "dot", "msc", "f$", "f[", "f{environment}{", "htmlonly",
275    * "latexonly", "manonly", "verbatim", "xmlonly", "cond", "if", "ifnot",
276    * "link"
277    * Returns 1 if success, 0 if the endcommand is never encountered.
278    */
279   void addCommandEndCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
280   /*
281    * CommandWordParagraph
282    * Format: @command <word> {paragraph}
283    * Commands such as param
284    * "param", "tparam", "throw", "throws", "retval", "exception"
285    */
286   void addCommandWordParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
287   /*
288    * CommandWordLine
289    * Format: @command <word> (line)
290    * Commands such as param
291    * "page", "subsection", "subsubsection", "section", "paragraph", "defgroup"
292    */
293   void addCommandWordLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
294   /*
295    * Command Word Optional Word Optional Word
296    * Format: @command <word> [<header-file>] [<header-name>]
297    * Commands such as class
298    * "category", "class", "protocol", "interface", "struct", "union"
299    */
300   void addCommandWordOWordOWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
301   /*
302    * Command Optional Word
303    * Format: @command [<word>]
304    * Commands such as dir
305    * "dir", "file", "cond"
306    */
307   void addCommandOWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
308 
309   /*
310    * Commands that should not be encountered (such as PHP only)
311    * goes til the end of line then returns
312    */
313   void addCommandErrorThrow(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
314 
315   void addCommandHtml(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
316 
317   void addCommandHtmlEntity(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
318 
319   /*
320    *Adds the unique commands- different process for each unique command
321    */
322   void addCommandUnique(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
323 
324   /*
325    * Replace the given command with its predefined alias expansion.
326    */
327   void aliasCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
328 
329   /*
330    * Simply ignore the given command, possibly with the word following it or
331    * until the matching end command.
332    */
333   void ignoreCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
334 
335   /*
336    * The actual "meat" of the doxygen parser. Calls the correct addCommand...()
337    * function.
338    */
339   void addCommand(const std::string &commandString, const TokenList &tokList, DoxygenEntityList &doxyList);
340 
341   DoxygenEntityList parse(TokenListCIt endParsingIndex, const TokenList &tokList, bool root = false);
342 
343   /*
344    * Fill static doxygenCommands and sectionIndicators containers
345    */
346   void fillTables();
347 
348   /** Processes comment when \htmlonly and \verbatim commands are encountered. */
349   size_t processVerbatimText(size_t pos, const std::string &line);
350 
351   bool processEscapedChars(size_t &pos, const std::string &line);
352   void processWordCommands(size_t &pos, const std::string &line);
353   void processHtmlTags(size_t &pos, const std::string &line);
354   void processHtmlEntities(size_t &pos, const std::string &line);
355 
356 
357   /** Processes comment outside \htmlonly and \verbatim commands. */
358   size_t processNormalComment(size_t pos, const std::string &line);
359 
360   void tokenizeDoxygenComment(const std::string &doxygenComment, const std::string &fileName, int fileLine);
361   void printList();
362   void printListError(int warningType, const std::string &message);
363 
364   typedef std::vector<std::string> StringVector;
365   typedef StringVector::const_iterator StringVectorCIt;
366 
367   StringVector split(const std::string &text, char separator);
368   bool isStartOfDoxyCommentChar(char c);
369   bool addDoxyCommand(DoxygenParser::TokenList &tokList, const std::string &cmd);
370 
371 public:
372   DoxygenParser(bool noisy = false);
373   virtual ~DoxygenParser();
374   DoxygenEntityList createTree(Node *node, String *documentation);
375 };
376 
377 #endif
378