1 /* ----------------------------------------------------------------------------- 2 * This file is part of SWIG, which is licensed as a whole under version 3 3 * (or any later version) of the GNU General Public License. Some additional 4 * terms also apply to certain portions of SWIG. The full details of the SWIG 5 * license and copyrights can be found in the LICENSE and COPYRIGHT files 6 * included with the SWIG source code as distributed by the SWIG developers 7 * and at http://www.swig.org/legal.html. 8 * 9 * doxyparser.h 10 * ----------------------------------------------------------------------------- */ 11 12 #ifndef DOXYGENPARSER_H_ 13 #define DOXYGENPARSER_H_ 14 #include <string> 15 #include <list> 16 #include <map> 17 #include <vector> 18 #include <set> 19 20 #include "swig.h" 21 22 #include "doxyentity.h" 23 24 // Utility function to return the base part of a command that may 25 // include options, e.g. param[in] -> param 26 std::string getBaseCommand(const std::string &cmd); 27 28 29 class DoxygenParser { 30 private: 31 32 enum DoxyCommandEnum { 33 NONE = -1, 34 SIMPLECOMMAND, 35 COMMANDWORD, 36 COMMANDLINE, 37 COMMANDPARAGRAPH, 38 COMMANDENDCOMMAND, 39 COMMANDWORDPARAGRAPH, 40 COMMANDWORDLINE, 41 COMMANDWORDOWORDWORD, 42 COMMANDOWORD, 43 COMMANDERRORTHROW, 44 COMMANDUNIQUE, 45 COMMAND_HTML, 46 COMMAND_HTML_ENTITY, 47 COMMAND_ALIAS, 48 COMMAND_IGNORE, 49 END_LINE, 50 PARAGRAPH_END, 51 PLAINSTRING, 52 COMMAND 53 }; 54 55 56 /** This class contains parts of Doxygen comment as a token. */ 57 class Token { 58 public: 59 DoxyCommandEnum m_tokenType; 60 std::string m_tokenString; /* the data , such as param for @param */ 61 Token(DoxyCommandEnum tType,std::string tString)62 Token(DoxyCommandEnum tType, std::string tString) : m_tokenType(tType), m_tokenString(tString) { 63 } 64 toString()65 std::string toString() const { 66 switch (m_tokenType) { 67 case END_LINE: 68 return "{END OF LINE}"; 69 case PARAGRAPH_END: 70 return "{END OF PARAGRAPH}"; 71 case PLAINSTRING: 72 return "{PLAINSTRING :" + m_tokenString + "}"; 73 case COMMAND: 74 return "{COMMAND : " + m_tokenString + "}"; 75 default: 76 return ""; 77 } 78 } 79 }; 80 81 82 typedef std::vector<Token> TokenList; 83 typedef TokenList::const_iterator TokenListCIt; 84 typedef TokenList::iterator TokenListIt; 85 86 TokenList m_tokenList; 87 TokenListCIt m_tokenListIt; 88 89 typedef std::map<std::string, DoxyCommandEnum> DoxyCommandsMap; 90 typedef DoxyCommandsMap::iterator DoxyCommandsMapIt; 91 92 /* 93 * Map of Doxygen commands to determine if a string is a 94 * command and how it needs to be parsed 95 */ 96 static DoxyCommandsMap doxygenCommands; 97 static std::set<std::string> doxygenSectionIndicators; 98 99 bool m_isVerbatimText; // used to handle \htmlonly and \verbatim commands 100 bool m_isInQuotedString; 101 102 Node *m_node; 103 std::string m_fileName; 104 int m_fileLineNo; 105 106 /* 107 * Return the end command for a command appearing in "ignore" feature or empty 108 * string if this is a simple command and not a block one. 109 */ 110 std::string getIgnoreFeatureEndCommand(const std::string &theCommand) const; 111 112 /* 113 * Helper for getting the value of doxygen:ignore feature or its argument. 114 */ 115 String *getIgnoreFeature(const std::string &theCommand, const char *argument = NULL) const; 116 117 /* 118 * Whether to print lots of debug info during parsing 119 */ 120 bool noisy; 121 122 /* 123 *Changes a std::string to all lower case 124 */ 125 std::string stringToLower(const std::string &stringToConvert); 126 127 /* 128 * isSectionIndicator returns a boolean if the command is a section indicator 129 * This is a helper method for finding the end of a paragraph 130 * by Doxygen's terms 131 */ 132 bool isSectionIndicator(const std::string &smallString); 133 /* 134 * Determines how a command should be handled (what group it belongs to 135 * for parsing rules 136 */ 137 DoxyCommandEnum commandBelongs(const std::string &theCommand); 138 139 /* 140 *prints the parse tree 141 */ 142 void printTree(const std::list<DoxygenEntity> &rootList); 143 144 /** 145 * Returns true if the next token is end of line token. This is important 146 * when single word commands like \c are at the end of line. 147 */ 148 bool isEndOfLine(); 149 150 /** 151 * Skips spaces, tabs, and end of line tokens. 152 */ 153 void skipWhitespaceTokens(); 154 155 /** 156 * Removes all spaces and tabs from beginning end end of string. 157 */ 158 std::string trim(const std::string &text); 159 160 /* 161 * Returns string of the next token if the next token is PLAINSTRING. Returns 162 * empty string otherwise. 163 */ 164 std::string getNextToken(); 165 166 /* 167 * Returns the next word ON THE CURRENT LINE ONLY 168 * if a new line is encountered, returns a blank std::string. 169 * Updates the iterator if successful. 170 */ 171 std::string getNextWord(); 172 173 /* 174 * Returns the next word, which is not necessarily on the same line. 175 * Updates the iterator if successful. 176 */ 177 std::string getNextWordInComment(); 178 179 /* 180 * Returns the location of the end of the line as 181 * an iterator. 182 */ 183 TokenListCIt getOneLine(const TokenList &tokList); 184 185 /* 186 * Returns a properly formatted std::string 187 * up til ANY command or end of line is encountered. 188 */ 189 std::string getStringTilCommand(const TokenList &tokList); 190 191 /* 192 * Returns a properly formatted std::string 193 * up til the command specified is encountered 194 */ 195 //TODO check that this behaves properly for formulas 196 std::string getStringTilEndCommand(const std::string &theCommand, const TokenList &tokList); 197 198 /* 199 * Returns the end of a Paragraph as an iterator- 200 * Paragraph is defined in Doxygen to be a paragraph of text 201 * separated by either a structural command or a blank line 202 */ 203 TokenListCIt getEndOfParagraph(const TokenList &tokList); 204 205 /* 206 * Returns the end of a section, defined as the first blank line OR first 207 * encounter of the same command. Example of this behaviour is \arg. 208 * If no end is encountered, returns the last token of the std::list. 209 */ 210 TokenListCIt getEndOfSection(const std::string &theCommand, const TokenList &tokList); 211 212 /* 213 * This method is for returning the end of a specific form of doxygen command 214 * that begins with a \command and ends in \endcommand 215 * such as \code and \endcode. The proper usage is 216 * progressTilEndCommand("endcode", tokenList); 217 * If the end is never encountered, it returns the end of the std::list. 218 */ 219 TokenListCIt getEndCommand(const std::string &theCommand, const TokenList &tokList); 220 /* 221 * A special method for commands such as \arg that end at the end of a 222 * paragraph OR when another \arg is encountered 223 //TODO getTilAnyCommand 224 TokenListCIt getTilAnyCommand(const std::string &theCommand, const TokenList &tokList); 225 */ 226 227 /** 228 * This methods skips end of line token, if it is the next token to be 229 * processed. It is called with comment commands which have args till the 230 * end of line, such as 'addtogroup' or 'addindex'. 231 * It is up to translator to specific language to decide whether 232 * to insert eol or not. For example, if a command is ignored in target 233 * language, new lines may make formatting ugly (Python). 234 */ 235 void skipEndOfLine(); 236 237 /* 238 * Method for Adding a Simple Command 239 * Format: @command 240 * Plain commands, such as newline etc, they contain no other data 241 * \n \\ \@ \& \$ \# \< \> \% 242 */ 243 void addSimpleCommand(const std::string &theCommand, DoxygenEntityList &doxyList); 244 /* 245 * CommandWord 246 * Format: @command <word> 247 * Commands with a single WORD after then such as @b 248 * "a", "b", "c", "e", "em", "p", "def", "enum", "example", "package", 249 * "relates", "namespace", "relatesalso","anchor", "dontinclude", "include", 250 * "includelineno" 251 */ 252 void addCommandWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 253 /* 254 * CommandLine 255 * Format: @command (line) 256 * Commands with a single LINE after then such as @var 257 * "addindex", "fn", "name", "line", "var", "skipline", "typedef", "skip", 258 * "until", "property" 259 */ 260 void addCommandLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 261 /* 262 * CommandParagraph 263 * Format: @command {paragraph} 264 * Commands with a single paragraph after then such as @return 265 * "return", "remarks", "since", "test", "sa", "see", "pre", "post", 266 * "details", "invariant", "deprecated", "date", "note", "warning", 267 * "version", "todo", "bug", "attention", "brief", "arg", "author" 268 */ 269 void addCommandParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 270 /* 271 * Command EndCommand 272 * Format: @command and ends at @endcommand 273 * Commands that take in a block of text such as @code: 274 * "code", "dot", "msc", "f$", "f[", "f{environment}{", "htmlonly", 275 * "latexonly", "manonly", "verbatim", "xmlonly", "cond", "if", "ifnot", 276 * "link" 277 * Returns 1 if success, 0 if the endcommand is never encountered. 278 */ 279 void addCommandEndCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 280 /* 281 * CommandWordParagraph 282 * Format: @command <word> {paragraph} 283 * Commands such as param 284 * "param", "tparam", "throw", "throws", "retval", "exception" 285 */ 286 void addCommandWordParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 287 /* 288 * CommandWordLine 289 * Format: @command <word> (line) 290 * Commands such as param 291 * "page", "subsection", "subsubsection", "section", "paragraph", "defgroup" 292 */ 293 void addCommandWordLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 294 /* 295 * Command Word Optional Word Optional Word 296 * Format: @command <word> [<header-file>] [<header-name>] 297 * Commands such as class 298 * "category", "class", "protocol", "interface", "struct", "union" 299 */ 300 void addCommandWordOWordOWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 301 /* 302 * Command Optional Word 303 * Format: @command [<word>] 304 * Commands such as dir 305 * "dir", "file", "cond" 306 */ 307 void addCommandOWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 308 309 /* 310 * Commands that should not be encountered (such as PHP only) 311 * goes til the end of line then returns 312 */ 313 void addCommandErrorThrow(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 314 315 void addCommandHtml(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 316 317 void addCommandHtmlEntity(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 318 319 /* 320 *Adds the unique commands- different process for each unique command 321 */ 322 void addCommandUnique(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 323 324 /* 325 * Replace the given command with its predefined alias expansion. 326 */ 327 void aliasCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 328 329 /* 330 * Simply ignore the given command, possibly with the word following it or 331 * until the matching end command. 332 */ 333 void ignoreCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList); 334 335 /* 336 * The actual "meat" of the doxygen parser. Calls the correct addCommand...() 337 * function. 338 */ 339 void addCommand(const std::string &commandString, const TokenList &tokList, DoxygenEntityList &doxyList); 340 341 DoxygenEntityList parse(TokenListCIt endParsingIndex, const TokenList &tokList, bool root = false); 342 343 /* 344 * Fill static doxygenCommands and sectionIndicators containers 345 */ 346 void fillTables(); 347 348 /** Processes comment when \htmlonly and \verbatim commands are encountered. */ 349 size_t processVerbatimText(size_t pos, const std::string &line); 350 351 bool processEscapedChars(size_t &pos, const std::string &line); 352 void processWordCommands(size_t &pos, const std::string &line); 353 void processHtmlTags(size_t &pos, const std::string &line); 354 void processHtmlEntities(size_t &pos, const std::string &line); 355 356 357 /** Processes comment outside \htmlonly and \verbatim commands. */ 358 size_t processNormalComment(size_t pos, const std::string &line); 359 360 void tokenizeDoxygenComment(const std::string &doxygenComment, const std::string &fileName, int fileLine); 361 void printList(); 362 void printListError(int warningType, const std::string &message); 363 364 typedef std::vector<std::string> StringVector; 365 typedef StringVector::const_iterator StringVectorCIt; 366 367 StringVector split(const std::string &text, char separator); 368 bool isStartOfDoxyCommentChar(char c); 369 bool addDoxyCommand(DoxygenParser::TokenList &tokList, const std::string &cmd); 370 371 public: 372 DoxygenParser(bool noisy = false); 373 virtual ~DoxygenParser(); 374 DoxygenEntityList createTree(Node *node, String *documentation); 375 }; 376 377 #endif 378