1 /* -----------------------------------------------------------------------------
2  * This file is part of SWIG, which is licensed as a whole under version 3
3  * (or any later version) of the GNU General Public License. Some additional
4  * terms also apply to certain portions of SWIG. The full details of the SWIG
5  * license and copyrights can be found in the LICENSE and COPYRIGHT files
6  * included with the SWIG source code as distributed by the SWIG developers
7  * and at http://www.swig.org/legal.html.
8  *
9  * doxyparser.cxx
10  * ----------------------------------------------------------------------------- */
11 
12 #include "doxyparser.h"
13 #include "doxycommands.h"
14 #include "swig.h"
15 #include "swigwarn.h"
16 
17 #include <iostream>
18 #include <algorithm>
19 #include <vector>
20 
21 using std::string;
22 using std::cout;
23 using std::endl;
24 
25 // This constant defines the (only) characters valid inside a Doxygen "word".
26 // It includes some unusual ones because of the commands such as \f[, \f{, \f],
27 // \f} and \f$.
28 static const char *DOXYGEN_WORD_CHARS = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "$[]{}";
29 
30 // Define static class members
31 DoxygenParser::DoxyCommandsMap DoxygenParser::doxygenCommands;
32 std::set<std::string> DoxygenParser::doxygenSectionIndicators;
33 
34 const int TOKENSPERLINE = 8; //change this to change the printing behaviour of the token list
35 const std::string END_HTML_TAG_MARK("/");
36 
getBaseCommand(const std::string & cmd)37 std::string getBaseCommand(const std::string &cmd) {
38   if (cmd.substr(0,5) == "param")
39     return "param";
40   else if (cmd.substr(0,4) == "code")
41     return "code";
42   else
43     return cmd;
44 }
45 
46 // Find the first position beyond the word command.  Extra logic is
47 // used to avoid putting the characters "," and "." in
48 // DOXYGEN_WORD_CHARS.
getEndOfWordCommand(const std::string & line,size_t pos)49 static size_t getEndOfWordCommand(const std::string &line, size_t pos) {
50   size_t endOfWordPos = line.find_first_not_of(DOXYGEN_WORD_CHARS, pos);
51   if (line.substr(pos, 6) == "param[")
52     // include ",", which can appear in param[in,out]
53     endOfWordPos = line.find_first_not_of(string(DOXYGEN_WORD_CHARS)+ ",", pos);
54   else if (line.substr(pos, 5) == "code{")
55     // include ".", which can appear in e.g. code{.py}
56     endOfWordPos = line.find_first_not_of(string(DOXYGEN_WORD_CHARS)+ ".", pos);
57   return endOfWordPos;
58 }
59 
60 
DoxygenParser(bool noisy)61 DoxygenParser::DoxygenParser(bool noisy) : noisy(noisy) {
62   fillTables();
63 }
64 
~DoxygenParser()65 DoxygenParser::~DoxygenParser() {
66 }
67 
fillTables()68 void DoxygenParser::fillTables() {
69   // run it only once
70   if (doxygenCommands.size())
71     return;
72 
73   // fill in tables with data from doxycommands.h
74   for (int i = 0; i < simpleCommandsSize; i++)
75     doxygenCommands[simpleCommands[i]] = SIMPLECOMMAND;
76 
77   for (int i = 0; i < commandWordsSize; i++)
78     doxygenCommands[commandWords[i]] = COMMANDWORD;
79 
80   for (int i = 0; i < commandLinesSize; i++)
81     doxygenCommands[commandLines[i]] = COMMANDLINE;
82 
83   for (int i = 0; i < commandParagraphSize; i++)
84     doxygenCommands[commandParagraph[i]] = COMMANDPARAGRAPH;
85 
86   for (int i = 0; i < commandEndCommandsSize; i++)
87     doxygenCommands[commandEndCommands[i]] = COMMANDENDCOMMAND;
88 
89   for (int i = 0; i < commandWordParagraphsSize; i++)
90     doxygenCommands[commandWordParagraphs[i]] = COMMANDWORDPARAGRAPH;
91 
92   for (int i = 0; i < commandWordLinesSize; i++)
93     doxygenCommands[commandWordLines[i]] = COMMANDWORDLINE;
94 
95   for (int i = 0; i < commandWordOWordOWordsSize; i++)
96     doxygenCommands[commandWordOWordOWords[i]] = COMMANDWORDOWORDWORD;
97 
98   for (int i = 0; i < commandOWordsSize; i++)
99     doxygenCommands[commandOWords[i]] = COMMANDOWORD;
100 
101   for (int i = 0; i < commandErrorThrowingsSize; i++)
102     doxygenCommands[commandErrorThrowings[i]] = COMMANDERRORTHROW;
103 
104   for (int i = 0; i < commandUniquesSize; i++)
105     doxygenCommands[commandUniques[i]] = COMMANDUNIQUE;
106 
107   for (int i = 0; i < commandHtmlSize; i++)
108     doxygenCommands[commandHtml[i]] = COMMAND_HTML;
109 
110   for (int i = 0; i < commandHtmlEntitiesSize; i++)
111     doxygenCommands[commandHtmlEntities[i]] = COMMAND_HTML_ENTITY;
112 
113   // fill section indicators command set
114   for (int i = 0; i < sectionIndicatorsSize; i++)
115     doxygenSectionIndicators.insert(sectionIndicators[i]);
116 }
117 
stringToLower(const std::string & stringToConvert)118 std::string DoxygenParser::stringToLower(const std::string &stringToConvert) {
119 
120   string result(stringToConvert.size(), ' ');
121 
122   for (size_t i = 0; i < result.size(); i++) {
123     result[i] = tolower(stringToConvert[i]);
124   }
125 
126   return result;
127 }
128 
isSectionIndicator(const std::string & smallString)129 bool DoxygenParser::isSectionIndicator(const std::string &smallString) {
130 
131   std::set<std::string>::iterator it = doxygenSectionIndicators.find(stringToLower(smallString));
132 
133   return it != doxygenSectionIndicators.end();
134 }
135 
printTree(const DoxygenEntityList & rootList)136 void DoxygenParser::printTree(const DoxygenEntityList &rootList) {
137   DoxygenEntityList::const_iterator p = rootList.begin();
138   while (p != rootList.end()) {
139     (*p).printEntity(0);
140     p++;
141   }
142 }
143 
commandBelongs(const std::string & theCommand)144 DoxygenParser::DoxyCommandEnum DoxygenParser::commandBelongs(const std::string &theCommand) {
145   DoxyCommandsMapIt it = doxygenCommands.find(stringToLower(getBaseCommand(theCommand)));
146 
147   if (it != doxygenCommands.end()) {
148     return it->second;
149   }
150   // Check if this command is defined as an alias.
151   if (Getattr(m_node, ("feature:doxygen:alias:" + theCommand).c_str())) {
152     return COMMAND_ALIAS;
153   }
154   // Check if this command should be ignored.
155   if (String *const ignore = getIgnoreFeature(theCommand)) {
156     // Check that no value is specified for this feature ("1" is the implicit
157     // one given to it by SWIG itself), we may use the value in the future, but
158     // for now we only use the attributes.
159     if (Strcmp(ignore, "1") != 0) {
160       Swig_warning(WARN_PP_UNEXPECTED_TOKENS, m_fileName.c_str(), m_fileLineNo,
161                    "Feature \"doxygen:ignore\" value ignored for Doxygen command \"%s\".\n", theCommand.c_str());
162     }
163     // Also ensure that the matching end command, if any, will be recognized.
164     const string endCommand = getIgnoreFeatureEndCommand(theCommand);
165     if (!endCommand.empty()) {
166       Setattr(m_node, ("feature:doxygen:ignore:" + endCommand).c_str(), NewString("1"));
167     }
168 
169     return COMMAND_IGNORE;
170   }
171 
172   return NONE;
173 }
174 
trim(const std::string & text)175 std::string DoxygenParser::trim(const std::string &text) {
176   size_t start = text.find_first_not_of(" \t");
177   size_t end = text.find_last_not_of(" \t");
178 
179   if (start == string::npos || start > end) {
180     return "";
181   }
182   return text.substr(start, end - start + 1);
183 }
184 
isEndOfLine()185 bool DoxygenParser::isEndOfLine() {
186   if (m_tokenListIt == m_tokenList.end()) {
187     return false;
188   }
189   Token nextToken = *m_tokenListIt;
190   return nextToken.m_tokenType == END_LINE;
191 }
192 
skipWhitespaceTokens()193 void DoxygenParser::skipWhitespaceTokens() {
194   if (m_tokenListIt == m_tokenList.end()) {
195     return;
196   }
197 
198   while (m_tokenListIt != m_tokenList.end()
199          && (m_tokenListIt->m_tokenType == END_LINE || trim(m_tokenListIt->m_tokenString).empty())) {
200 
201     m_tokenListIt++;
202   }
203 }
204 
getNextToken()205 std::string DoxygenParser::getNextToken() {
206 
207   if (m_tokenListIt == m_tokenList.end()) {
208     return "";
209   }
210 
211   if (m_tokenListIt->m_tokenType == PLAINSTRING) {
212     return (m_tokenListIt++)->m_tokenString;
213   }
214 
215   return "";
216 }
217 
getNextWord()218 std::string DoxygenParser::getNextWord() {
219 
220   /*    if (m_tokenListIt == m_tokenList.end()) {
221      return "";
222      }
223    */
224   while (m_tokenListIt != m_tokenList.end()
225          && (m_tokenListIt->m_tokenType == PLAINSTRING)) {
226     // handle quoted strings as words
227     string token = m_tokenListIt->m_tokenString;
228     if (token == "\"") {
229 
230       string word = m_tokenListIt->m_tokenString;
231       m_tokenListIt++;
232       while (true) {
233         string nextWord = getNextToken();
234         if (nextWord.empty()) { // maybe report unterminated string error
235           return word;
236         }
237         word += nextWord;
238         if (nextWord == "\"") {
239           return word;
240         }
241       }
242     }
243 
244     string tokenStr = trim(m_tokenListIt->m_tokenString);
245     m_tokenListIt++;
246     if (!tokenStr.empty()) {
247       return tokenStr;
248     }
249   }
250 
251   return "";
252 }
253 
getOneLine(const TokenList & tokList)254 DoxygenParser::TokenListCIt DoxygenParser::getOneLine(const TokenList &tokList) {
255 
256   TokenListCIt endOfLineIt = m_tokenListIt;
257 
258   while (endOfLineIt != tokList.end()) {
259     if (endOfLineIt->m_tokenType == END_LINE) {
260       return endOfLineIt;
261     }
262     endOfLineIt++;
263   }
264 
265   return tokList.end();
266 }
267 
getStringTilCommand(const TokenList & tokList)268 std::string DoxygenParser::getStringTilCommand(const TokenList &tokList) {
269 
270   if (m_tokenListIt == tokList.end()) {
271     return "";
272   }
273 
274   string description;
275 
276   while (m_tokenListIt->m_tokenType == PLAINSTRING) {
277     const Token &currentToken = *m_tokenListIt++;
278     if (currentToken.m_tokenType == PLAINSTRING) {
279       description = description + currentToken.m_tokenString; // + " ";
280     }
281   }
282   return description;
283 }
284 
getStringTilEndCommand(const std::string & theCommand,const TokenList & tokList)285 std::string DoxygenParser::getStringTilEndCommand(const std::string &theCommand, const TokenList &tokList) {
286 
287   if (m_tokenListIt == tokList.end()) {
288     return "";
289   }
290 
291   string description;
292   while (m_tokenListIt != tokList.end()) {
293 
294     if (m_tokenListIt->m_tokenType == PLAINSTRING) {
295       description += m_tokenListIt->m_tokenString;
296     } else if (m_tokenListIt->m_tokenType == END_LINE) {
297       description += "\n";
298     } else if (m_tokenListIt->m_tokenString == theCommand) {
299       m_tokenListIt++;
300       return description;
301     }
302 
303     m_tokenListIt++;
304   }
305 
306   printListError(WARN_DOXYGEN_COMMAND_EXPECTED, "Expected Doxygen command: " + theCommand + ".");
307 
308   return description;
309 }
310 
getEndOfParagraph(const TokenList & tokList)311 DoxygenParser::TokenListCIt DoxygenParser::getEndOfParagraph(const TokenList &tokList) {
312 
313   TokenListCIt endOfParagraph = m_tokenListIt;
314 
315   while (endOfParagraph != tokList.end()) {
316     // If \code or \verbatim is encountered within a paragraph, then
317     // go all the way to the end of that command, since the content
318     // could contain empty lines that would appear to be paragraph
319     // ends:
320     if (endOfParagraph->m_tokenType == COMMAND &&
321 	(endOfParagraph->m_tokenString == "code" ||
322 	 endOfParagraph->m_tokenString == "verbatim")) {
323       const string theCommand = endOfParagraph->m_tokenString;
324       endOfParagraph = getEndCommand("end" + theCommand, tokList);
325       endOfParagraph++; // Move after the end command
326       return endOfParagraph;
327     }
328     if (endOfParagraph->m_tokenType == END_LINE) {
329       endOfParagraph++;
330       if (endOfParagraph != tokList.end()
331           && endOfParagraph->m_tokenType == END_LINE) {
332         endOfParagraph++;
333         //cout << "ENCOUNTERED END OF PARA" << endl;
334         return endOfParagraph;
335       }
336 
337     } else if (endOfParagraph->m_tokenType == COMMAND) {
338 
339       if (isSectionIndicator(getBaseCommand(endOfParagraph->m_tokenString))) {
340         return endOfParagraph;
341       } else {
342         endOfParagraph++;
343       }
344 
345     } else if (endOfParagraph->m_tokenType == PLAINSTRING) {
346       endOfParagraph++;
347     } else {
348       return tokList.end();
349     }
350   }
351 
352   return tokList.end();
353 }
354 
getEndOfSection(const std::string & theCommand,const TokenList & tokList)355 DoxygenParser::TokenListCIt DoxygenParser::getEndOfSection(const std::string &theCommand, const TokenList &tokList) {
356 
357   TokenListCIt endOfParagraph = m_tokenListIt;
358 
359   while (endOfParagraph != tokList.end()) {
360     if (endOfParagraph->m_tokenType == COMMAND) {
361       if (theCommand == endOfParagraph->m_tokenString)
362         return endOfParagraph;
363       else
364         endOfParagraph++;
365     } else if (endOfParagraph->m_tokenType == PLAINSTRING) {
366       endOfParagraph++;
367     } else if (endOfParagraph->m_tokenType == END_LINE) {
368       endOfParagraph++;
369       if (endOfParagraph->m_tokenType == END_LINE) {
370         endOfParagraph++;
371         return endOfParagraph;
372       }
373     }
374   }
375   return tokList.end();
376 }
377 
getEndCommand(const std::string & theCommand,const TokenList & tokList)378 DoxygenParser::TokenListCIt DoxygenParser::getEndCommand(const std::string &theCommand, const TokenList &tokList) {
379 
380   TokenListCIt endOfCommand = m_tokenListIt;
381 
382   while (endOfCommand != tokList.end()) {
383     endOfCommand++;
384     if ((*endOfCommand).m_tokenType == COMMAND) {
385       if (theCommand == (*endOfCommand).m_tokenString) {
386         return endOfCommand;
387       }
388     }
389   }
390   //End command not found
391   return tokList.end();
392 }
393 
skipEndOfLine()394 void DoxygenParser::skipEndOfLine() {
395   if (m_tokenListIt != m_tokenList.end()
396       && m_tokenListIt->m_tokenType == END_LINE) {
397     m_tokenListIt++;
398   }
399 }
400 
addSimpleCommand(const std::string & theCommand,DoxygenEntityList & doxyList)401 void DoxygenParser::addSimpleCommand(const std::string &theCommand, DoxygenEntityList &doxyList) {
402   if (noisy)
403     cout << "Parsing " << theCommand << endl;
404 
405   doxyList.push_back(DoxygenEntity(theCommand));
406 }
407 
addCommandWord(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)408 void DoxygenParser::addCommandWord(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
409   if (noisy)
410     cout << "Parsing " << theCommand << endl;
411 
412   if (isEndOfLine()) {
413     // handles cases when command is at the end of line (for example "\c\nreally"
414     skipWhitespaceTokens();
415     doxyList.push_back(DoxygenEntity("plainstd::endl"));
416   }
417   std::string name = getNextWord();
418   if (!name.empty()) {
419     DoxygenEntityList aNewList;
420     aNewList.push_back(DoxygenEntity("plainstd::string", name));
421     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
422   } else {
423     printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
424   }
425 }
426 
addCommandLine(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)427 void DoxygenParser::addCommandLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
428   if (noisy)
429     cout << "Parsing " << theCommand << endl;
430   TokenListCIt endOfLine = getOneLine(tokList);
431   DoxygenEntityList aNewList = parse(endOfLine, tokList);
432   doxyList.push_back(DoxygenEntity(theCommand, aNewList));
433   skipEndOfLine();
434 }
435 
addCommandParagraph(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)436 void DoxygenParser::addCommandParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
437   if (noisy)
438     cout << "Parsing " << theCommand << endl;
439 
440   TokenListCIt endOfParagraph = getEndOfParagraph(tokList);
441   DoxygenEntityList aNewList;
442   aNewList = parse(endOfParagraph, tokList);
443   doxyList.push_back(DoxygenEntity(theCommand, aNewList));
444 }
445 
addCommandEndCommand(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)446 void DoxygenParser::addCommandEndCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
447   if (noisy)
448     cout << "Parsing " << theCommand << endl;
449   TokenListCIt endCommand = getEndCommand("end" + theCommand, tokList);
450   if (endCommand == tokList.end()) {
451     printListError(WARN_DOXYGEN_COMMAND_EXPECTED, "Expected Doxygen command: end" + theCommand + ".");
452     return;
453   }
454   DoxygenEntityList aNewList;
455   aNewList = parse(endCommand, tokList);
456   m_tokenListIt++;
457   doxyList.push_back(DoxygenEntity(theCommand, aNewList));
458 }
459 
addCommandWordParagraph(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)460 void DoxygenParser::addCommandWordParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
461   if (noisy)
462     cout << "Parsing " << theCommand << endl;
463 
464   std::string name = getNextWord();
465 
466   if (name.empty()) {
467     printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
468     return;
469   }
470   TokenListCIt endOfParagraph = getEndOfParagraph(tokList);
471   DoxygenEntityList aNewList;
472   aNewList = parse(endOfParagraph, tokList);
473   aNewList.push_front(DoxygenEntity("plainstd::string", name));
474   doxyList.push_back(DoxygenEntity(theCommand, aNewList));
475 }
476 
addCommandWordLine(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)477 void DoxygenParser::addCommandWordLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
478   if (noisy)
479     cout << "Parsing " << theCommand << endl;
480   std::string name = getNextWord();
481   if (name.empty()) {
482     printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
483     return;
484   }
485 
486   TokenListCIt endOfLine = getOneLine(tokList);
487   DoxygenEntityList aNewList;
488   aNewList = parse(endOfLine, tokList);
489   aNewList.push_front(DoxygenEntity("plainstd::string", name));
490   doxyList.push_back(DoxygenEntity(theCommand, aNewList));
491   //else cout << "No line followed " << theCommand <<  " command. Not added" << endl;
492 }
493 
addCommandWordOWordOWord(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)494 void DoxygenParser::addCommandWordOWordOWord(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
495   if (noisy)
496     cout << "Parsing " << theCommand << endl;
497 
498   std::string name = getNextWord();
499   if (name.empty()) {
500     printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
501     return;
502   }
503   std::string headerfile = getNextWord();
504   std::string headername = getNextWord();
505   DoxygenEntityList aNewList;
506   aNewList.push_back(DoxygenEntity("plainstd::string", name));
507   if (!headerfile.empty())
508     aNewList.push_back(DoxygenEntity("plainstd::string", headerfile));
509   if (!headername.empty())
510     aNewList.push_back(DoxygenEntity("plainstd::string", headername));
511   doxyList.push_back(DoxygenEntity(theCommand, aNewList));
512 }
513 
addCommandOWord(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)514 void DoxygenParser::addCommandOWord(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
515   if (noisy)
516     cout << "Parsing " << theCommand << endl;
517 
518   std::string name = getNextWord();
519   DoxygenEntityList aNewList;
520   aNewList.push_back(DoxygenEntity("plainstd::string", name));
521   doxyList.push_back(DoxygenEntity(theCommand, aNewList));
522 }
523 
addCommandErrorThrow(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList &)524 void DoxygenParser::addCommandErrorThrow(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &) {
525 
526   printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": Unexpectedly encountered this command.");
527   m_tokenListIt = getOneLine(tokList);
528 }
529 
addCommandHtml(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)530 void DoxygenParser::addCommandHtml(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
531   if (noisy)
532     cout << "Parsing " << theCommand << endl;
533 
534   std::string htmlTagArgs = getNextToken();
535   doxyList.push_back(DoxygenEntity(theCommand, htmlTagArgs));
536 }
537 
addCommandHtmlEntity(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)538 void DoxygenParser::addCommandHtmlEntity(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
539   if (noisy)
540     cout << "Parsing " << theCommand << endl;
541 
542   DoxygenEntityList aNewList;
543   doxyList.push_back(DoxygenEntity(theCommand, aNewList));
544 }
545 
addCommandUnique(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)546 void DoxygenParser::addCommandUnique(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
547 
548   static std::map<std::string, std::string> endCommands;
549   DoxygenEntityList aNewList;
550   if (theCommand == "arg" || theCommand == "li") {
551     TokenListCIt endOfSection = getEndOfSection(theCommand, tokList);
552     DoxygenEntityList aNewList;
553     aNewList = parse(endOfSection, tokList);
554     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
555   }
556   // \xrefitem <key> "(heading)" "(std::list title)" {text}
557   else if (theCommand == "xrefitem") {
558     if (noisy)
559       cout << "Parsing " << theCommand << endl;
560     std::string key = getNextWord();
561     if (key.empty()) {
562       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No key followed the command. Command ignored.");
563       return;
564     }
565     std::string heading = getNextWord();
566     if (key.empty()) {
567       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No heading followed the command. Command ignored.");
568       return;
569     }
570     std::string title = getNextWord();
571     if (title.empty()) {
572       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No title followed the command. Command ignored.");
573       return;
574     }
575     TokenListCIt endOfParagraph = getEndOfParagraph(tokList);
576     aNewList = parse(endOfParagraph, tokList);
577     aNewList.push_front(DoxygenEntity("plainstd::string", title));
578     aNewList.push_front(DoxygenEntity("plainstd::string", heading));
579     aNewList.push_front(DoxygenEntity("plainstd::string", key));
580     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
581   }
582   // \ingroup (<groupname> [<groupname> <groupname>])
583   else if (theCommand == "ingroup") {
584     std::string name = getNextWord();
585     aNewList.push_back(DoxygenEntity("plainstd::string", name));
586     name = getNextWord();
587     if (!name.empty())
588       aNewList.push_back(DoxygenEntity("plainstd::string", name));
589     name = getNextWord();
590     if (!name.empty())
591       aNewList.push_back(DoxygenEntity("plainstd::string", name));
592     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
593   }
594   // \par [(paragraph title)] { paragraph }
595   else if (theCommand == "par") {
596     TokenListCIt endOfLine = getOneLine(tokList);
597     aNewList = parse(endOfLine, tokList);
598     DoxygenEntityList aNewList2;
599     TokenListCIt endOfParagraph = getEndOfParagraph(tokList);
600     aNewList2 = parse(endOfParagraph, tokList);
601     aNewList.splice(aNewList.end(), aNewList2);
602     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
603   }
604   // \headerfile <header-file> [<header-name>]
605   else if (theCommand == "headerfile") {
606     DoxygenEntityList aNewList;
607     std::string name = getNextWord();
608     aNewList.push_back(DoxygenEntity("plainstd::string", name));
609     name = getNextWord();
610     if (!name.empty())
611       aNewList.push_back(DoxygenEntity("plainstd::string", name));
612     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
613   }
614   // \overload [(function declaration)]
615   else if (theCommand == "overload") {
616     TokenListCIt endOfLine = getOneLine(tokList);
617     if (endOfLine != m_tokenListIt) {
618       DoxygenEntityList aNewList;
619       aNewList = parse(endOfLine, tokList);
620       doxyList.push_back(DoxygenEntity(theCommand, aNewList));
621     } else
622       doxyList.push_back(DoxygenEntity(theCommand));
623   }
624   // \weakgroup <name> [(title)]
625   else if (theCommand == "weakgroup") {
626     if (noisy)
627       cout << "Parsing " << theCommand << endl;
628     std::string name = getNextWord();
629     if (name.empty()) {
630       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
631       return;
632     }
633     DoxygenEntityList aNewList;
634     TokenListCIt endOfLine = getOneLine(tokList);
635     if (endOfLine != m_tokenListIt) {
636       aNewList = parse(endOfLine, tokList);
637     }
638     aNewList.push_front(DoxygenEntity("plainstd::string", name));
639     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
640   }
641   // \ref <name> ["(text)"]
642   else if (theCommand == "ref") {
643     if (noisy)
644       cout << "Parsing " << theCommand << endl;
645     std::string name = getNextWord();
646     if (name.empty()) {
647       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No key followed the command. Command ignored.");
648       return;
649     }
650     DoxygenEntityList aNewList;
651     aNewList.push_front(DoxygenEntity("plainstd::string", name));
652     // TokenListCIt endOfLine = getOneLine(tokList);
653     // if (endOfLine != m_tokenListIt) {
654     //   aNewList = parse(endOfLine, tokList);
655     //}
656     TokenListCIt tmpIt = m_tokenListIt;
657     std::string refTitle = getNextWord();
658     // If title is following the ref tag, it must be quoted. Otherwise
659     // doxy puts link on ref id.
660     if (refTitle.size() > 1 && refTitle[0] == '"') {
661       // remove quotes
662       refTitle = refTitle.substr(1, refTitle.size() - 2);
663       aNewList.push_back(DoxygenEntity("plainstd::string", refTitle));
664     } else {
665       // no quoted string is following, so we have to restore iterator
666       m_tokenListIt = tmpIt;
667     }
668     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
669   }
670   // \subpage <name> ["(text)"]
671   else if (theCommand == "subpage") {
672     if (noisy)
673       cout << "Parsing " << theCommand << endl;
674     std::string name = getNextWord();
675     if (name.empty()) {
676       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No name followed the command. Command ignored.");
677       return;
678     }
679     std::string text = getNextWord();
680     aNewList.push_back(DoxygenEntity("plainstd::string", name));
681     if (!text.empty())
682       aNewList.push_back(DoxygenEntity("plainstd::string", text));
683     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
684   }
685   // \code ... \endcode
686   // \verbatim ... \endverbatim
687   // \dot dotcode \enddot
688   // \msc msccode \endmsc
689   // \f[ ... \f]
690   // \f{ ... \f}
691   // \f{env}{ ... \f}
692   // \f$ ... \f$
693   else if (getBaseCommand(theCommand) == "code" || theCommand == "verbatim"
694            || theCommand == "dot" || theCommand == "msc" || theCommand == "f[" || theCommand == "f{" || theCommand == "f$") {
695     if (!endCommands.size()) {
696       // fill in static table of end commands
697       endCommands["f["] = "f]";
698       endCommands["f{"] = "f}";
699       endCommands["f$"] = "f$";
700     }
701     if (noisy)
702       cout << "Parsing " << theCommand << endl;
703 
704     std::string endCommand;
705     std::map<std::string, std::string>::iterator it;
706     it = endCommands.find(theCommand);
707     if (it != endCommands.end())
708       endCommand = it->second;
709     else
710       endCommand = "end" + getBaseCommand(theCommand);
711 
712     std::string content = getStringTilEndCommand(endCommand, tokList);
713     aNewList.push_back(DoxygenEntity("plainstd::string", content));
714     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
715   }
716   // \dotfile <file> ["caption"]
717   // \mscfile <file> ["caption"]
718   else if (theCommand == "dotfile" || theCommand == "mscfile") {
719     if (noisy)
720       cout << "Parsing " << theCommand << endl;
721     std::string file = getNextWord();
722     if (file.empty()) {
723       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No file followed the command. Command ignored.");
724       return;
725     }
726     std::string caption = getNextWord();
727     aNewList.push_back(DoxygenEntity("plainstd::string", file));
728     if (!caption.empty())
729       aNewList.push_back(DoxygenEntity("plainstd::string", caption));
730     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
731   }
732   // \image <format> <file> ["caption"] [<sizeindication>=<size>]
733   else if (theCommand == "image") {
734     if (noisy)
735       cout << "Parsing " << theCommand << endl;
736     std::string format = getNextWord();
737     if (format.empty()) {
738       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No format followed the command. Command ignored.");
739       return;
740     }
741     std::string file = getNextWord();
742     if (file.empty()) {
743       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No name followed the command. Command ignored.");
744       return;
745     }
746     std::string caption = getNextWord();
747     std::string size = getNextWord();
748 
749     DoxygenEntityList aNewList;
750     aNewList.push_back(DoxygenEntity("plainstd::string", format));
751     aNewList.push_back(DoxygenEntity("plainstd::string", file));
752     if (!caption.empty())
753       aNewList.push_back(DoxygenEntity("plainstd::string", caption));
754     if (!size.empty())
755       aNewList.push_back(DoxygenEntity("plainstd::string", size));
756     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
757   }
758   // \addtogroup <name> [(title)]
759   else if (theCommand == "addtogroup") {
760     if (noisy)
761       cout << "Parsing " << theCommand << endl;
762     std::string name = getNextWord();
763     if (name.empty()) {
764       printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": There should be at least one word following the command. Command ignored.");
765       return;
766     }
767     DoxygenEntityList aNewList;
768     TokenListCIt endOfLine = getOneLine(tokList);
769     if (endOfLine != m_tokenListIt) {
770       aNewList = parse(endOfLine, tokList);
771     }
772     aNewList.push_front(DoxygenEntity("plainstd::string", name));
773     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
774     skipEndOfLine();
775   }
776   // \if <cond> [\else ...] [\elseif <cond> ...] \endif
777   else if (theCommand == "if" || theCommand == "ifnot" || theCommand == "else" || theCommand == "elseif") {
778     if (noisy)
779       cout << "Parsing " << theCommand << endl;
780 
781     std::string cond;
782     bool skipEndif = false; // if true then we skip endif after parsing block of code
783     bool needsCond = (theCommand == "if" || theCommand == "ifnot" || theCommand == "elseif");
784     if (needsCond) {
785       cond = getNextWord();
786       if (cond.empty()) {
787         printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
788         return;
789       }
790     }
791 
792     int nestedCounter = 1;
793     TokenListCIt endCommand = tokList.end();
794 
795     // go through the commands and find closing endif or else or elseif
796     for (TokenListCIt it = m_tokenListIt; it != tokList.end(); it++) {
797       if (it->m_tokenType == COMMAND) {
798         if (it->m_tokenString == "if" || it->m_tokenString == "ifnot")
799           nestedCounter++;
800         else if (it->m_tokenString == "endif")
801           nestedCounter--;
802         if (nestedCounter == 1 && (it->m_tokenString == "else" || it->m_tokenString == "elseif")) { // else found
803           endCommand = it;
804           break;
805         }
806         if (nestedCounter == 0) { // endif found
807           endCommand = it;
808           skipEndif = true;
809           break;
810         }
811       }
812     }
813 
814     if (endCommand == tokList.end()) {
815       printListError(WARN_DOXYGEN_COMMAND_EXPECTED, "Expected Doxygen command: endif.");
816       return;
817     }
818 
819     DoxygenEntityList aNewList;
820     aNewList = parse(endCommand, tokList);
821     if (skipEndif)
822       m_tokenListIt++;
823     if (needsCond)
824       aNewList.push_front(DoxygenEntity("plainstd::string", cond));
825     doxyList.push_back(DoxygenEntity(theCommand, aNewList));
826   }
827 }
828 
aliasCommand(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)829 void DoxygenParser::aliasCommand(const std::string &theCommand, const TokenList &/* tokList */ , DoxygenEntityList &doxyList) {
830   String *const alias = Getattr(m_node, ("feature:doxygen:alias:" + theCommand).c_str());
831   if (!alias)
832     return;
833 
834   doxyList.push_back(DoxygenEntity("plainstd::string", Char(alias)));
835 }
836 
getIgnoreFeature(const std::string & theCommand,const char * argument) const837 String *DoxygenParser::getIgnoreFeature(const std::string &theCommand, const char *argument) const {
838   string feature_name = "feature:doxygen:ignore:" + theCommand;
839   if (argument) {
840     feature_name += ':';
841     feature_name += argument;
842   }
843 
844   return Getattr(m_node, feature_name.c_str());
845 }
846 
getIgnoreFeatureEndCommand(const std::string & theCommand) const847 string DoxygenParser::getIgnoreFeatureEndCommand(const std::string &theCommand) const {
848   // We may be dealing either with a simple command or with the starting command
849   // of a block, as indicated by the value of "range" starting with "end".
850   string endCommand;
851   if (String *const range = getIgnoreFeature(theCommand, "range")) {
852     const char *const p = Char(range);
853     if (strncmp(p, "end", 3) == 0) {
854       if (p[3] == ':') {
855         // Normally the end command name follows after the colon.
856         endCommand = p + 4;
857       } else if (p[3] == '\0') {
858         // But it may be omitted in which case the default Doxygen convention of
859         // using "something"/"endsomething" is used.
860         endCommand = "end" + theCommand;
861       }
862     }
863   }
864 
865   return endCommand;
866 }
867 
ignoreCommand(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)868 void DoxygenParser::ignoreCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
869   const string endCommand = getIgnoreFeatureEndCommand(theCommand);
870   if (!endCommand.empty()) {
871     TokenListCIt itEnd = getEndCommand(endCommand, tokList);
872     if (itEnd == tokList.end()) {
873       printListError(WARN_DOXYGEN_COMMAND_EXPECTED, "Expected Doxygen command: " + endCommand + ".");
874       return;
875     }
876     // If we ignore the command, also ignore any whitespace preceding it as we
877     // want to avoid having lines consisting of whitespace only or trailing
878     // whitespace in general (at least Python, with its pep8 tool, really
879     // doesn't like it).
880     if (!doxyList.empty()) {
881       DoxygenEntityList::iterator i = doxyList.end();
882       --i;
883       if (i->typeOfEntity == "plainstd::string" && i->data.find_first_not_of(" \t") == std::string::npos) {
884         doxyList.erase(i);
885       }
886     }
887     // Determine what to do with the part of the comment between the start and
888     // end commands: by default, we simply throw it away, but "contents"
889     // attribute may be used to change this.
890     if (String *const contents = getIgnoreFeature(theCommand, "contents")) {
891       // Currently only "parse" is supported but we may need to add "copy" to
892       // handle custom tags which contain text that is supposed to be copied
893       // verbatim in the future.
894       if (Strcmp(contents, "parse") == 0) {
895         DoxygenEntityList aNewList = parse(itEnd, tokList);
896         doxyList.splice(doxyList.end(), aNewList);
897       } else {
898         Swig_error(m_fileName.c_str(), m_fileLineNo, "Invalid \"doxygen:ignore\" feature \"contents\" attribute \"%s\".\n", Char(contents));
899         return;
900       }
901     }
902 
903     m_tokenListIt = itEnd;
904     m_tokenListIt++;
905   } else if (String *const range = getIgnoreFeature(theCommand, "range")) {
906     // Currently we only support "line" but, in principle, we should also
907     // support "word" and "paragraph" for consistency with the built-in Doxygen
908     // commands which can have either of these three ranges (which are indicated
909     // using <word-arg>, (line-arg) and {para-arg} respectively in Doxygen
910     // documentation).
911     if (Strcmp(range, "line") == 0) {
912       // Consume everything until the end of line.
913       m_tokenListIt = getOneLine(tokList);
914       skipEndOfLine();
915     } else {
916       Swig_error(m_fileName.c_str(), m_fileLineNo, "Invalid \"doxygen:ignore\" feature \"range\" attribute \"%s\".\n", Char(range));
917       return;
918     }
919   }
920 }
921 
addCommand(const std::string & commandString,const TokenList & tokList,DoxygenEntityList & doxyList)922 void DoxygenParser::addCommand(const std::string &commandString, const TokenList &tokList, DoxygenEntityList &doxyList) {
923 
924   string theCommand = stringToLower(commandString);
925 
926   if (theCommand == "plainstd::string") {
927     string nextPhrase = getStringTilCommand(tokList);
928     if (noisy)
929       cout << "Parsing plain std::string :" << nextPhrase << endl;
930     doxyList.push_back(DoxygenEntity("plainstd::string", nextPhrase));
931     return;
932   }
933 
934   switch (commandBelongs(commandString)) {
935   case SIMPLECOMMAND:
936     addSimpleCommand(theCommand, doxyList);
937     break;
938   case COMMANDWORD:
939     addCommandWord(theCommand, tokList, doxyList);
940     break;
941   case COMMANDLINE:
942     addCommandLine(theCommand, tokList, doxyList);
943     break;
944   case COMMANDPARAGRAPH:
945     addCommandParagraph(theCommand, tokList, doxyList);
946     break;
947   case COMMANDENDCOMMAND:
948     addCommandEndCommand(theCommand, tokList, doxyList);
949     break;
950   case COMMANDWORDPARAGRAPH:
951     addCommandWordParagraph(theCommand, tokList, doxyList);
952     break;
953   case COMMANDWORDLINE:
954     addCommandWordLine(theCommand, tokList, doxyList);
955     break;
956   case COMMANDWORDOWORDWORD:
957     addCommandWordOWordOWord(theCommand, tokList, doxyList);
958     break;
959   case COMMANDOWORD:
960     addCommandOWord(theCommand, tokList, doxyList);
961     break;
962   case COMMANDERRORTHROW:
963     addCommandErrorThrow(theCommand, tokList, doxyList);
964     break;
965   case COMMANDUNIQUE:
966     addCommandUnique(theCommand, tokList, doxyList);
967     break;
968   case COMMAND_HTML:
969     addCommandHtml(theCommand, tokList, doxyList);
970     break;
971   case COMMAND_HTML_ENTITY:
972     addCommandHtmlEntity(theCommand, tokList, doxyList);
973     break;
974   case COMMAND_ALIAS:
975     aliasCommand(commandString, tokList, doxyList);
976     break;
977   case COMMAND_IGNORE:
978     ignoreCommand(commandString, tokList, doxyList);
979     break;
980   case NONE:
981   case END_LINE:
982   case PARAGRAPH_END:
983   case PLAINSTRING:
984   case COMMAND:
985     // TODO: Ensure that these values either are correctly ignored here or can't happen.
986     break;
987   }
988 }
989 
990 /**
991  * This method converts TokenList to DoxygenEntryList.
992  */
parse(TokenListCIt endParsingIndex,const TokenList & tokList,bool root)993 DoxygenEntityList DoxygenParser::parse(TokenListCIt endParsingIndex, const TokenList &tokList, bool root) {
994   // if we are root, than any strings should be added as 'partofdescription', else as 'plainstd::string'
995   std::string currPlainstringCommandType = root ? "partofdescription" : "plainstd::string";
996   DoxygenEntityList aNewList;
997 
998   // Less than check (instead of not equal) is a safeguard in case the
999   // iterator is incremented past the end
1000   while (m_tokenListIt < endParsingIndex) {
1001 
1002     Token currToken = *m_tokenListIt;
1003 
1004     if (noisy)
1005       cout << "Parsing for phrase starting in:" << currToken.toString() << endl;
1006 
1007     if (currToken.m_tokenType == END_LINE) {
1008       aNewList.push_back(DoxygenEntity("plainstd::endl"));
1009       m_tokenListIt++;
1010     } else if (currToken.m_tokenType == COMMAND) {
1011       m_tokenListIt++;
1012       addCommand(currToken.m_tokenString, tokList, aNewList);
1013     } else if (currToken.m_tokenType == PLAINSTRING) {
1014       addCommand(currPlainstringCommandType, tokList, aNewList);
1015     }
1016 
1017     // If addCommand above misbehaves, it can move the iterator past endParsingIndex
1018     if (m_tokenListIt > endParsingIndex)
1019       printListError(WARN_DOXYGEN_UNEXPECTED_ITERATOR_VALUE, "Unexpected iterator value in DoxygenParser::parse");
1020 
1021     if (endParsingIndex != tokList.end() && m_tokenListIt == tokList.end()) {
1022       // this could happen if we can't reach the original endParsingIndex
1023       printListError(WARN_DOXYGEN_UNEXPECTED_END_OF_COMMENT, "Unexpected end of Doxygen comment encountered.");
1024       break;
1025     }
1026   }
1027   return aNewList;
1028 }
1029 
createTree(Node * node,String * documentation)1030 DoxygenEntityList DoxygenParser::createTree(Node *node, String *documentation) {
1031   m_node = node;
1032 
1033   tokenizeDoxygenComment(Char(documentation), Char(Getfile(documentation)), Getline(documentation));
1034 
1035   if (noisy) {
1036     cout << "---TOKEN LIST---" << endl;
1037     printList();
1038   }
1039 
1040   DoxygenEntityList rootList = parse(m_tokenList.end(), m_tokenList, true);
1041 
1042   if (noisy) {
1043     cout << "PARSED LIST" << endl;
1044     printTree(rootList);
1045   }
1046   return rootList;
1047 }
1048 
1049 /*
1050  * Splits 'text' on 'separator' chars. Separator chars are not part of the
1051  * strings.
1052  */
split(const std::string & text,char separator)1053 DoxygenParser::StringVector DoxygenParser::split(const std::string &text, char separator) {
1054   StringVector lines;
1055   size_t prevPos = 0, pos = 0;
1056 
1057   while (pos < string::npos) {
1058     pos = text.find(separator, prevPos);
1059     lines.push_back(text.substr(prevPos, pos - prevPos));
1060     prevPos = pos + 1;
1061   }
1062 
1063   return lines;
1064 }
1065 
1066 /*
1067  * Returns true, if 'c' is one of doxygen comment block start
1068  * characters: *, /, or !
1069  */
isStartOfDoxyCommentChar(char c)1070 bool DoxygenParser::isStartOfDoxyCommentChar(char c) {
1071   return (strchr("*/!", c) != NULL);
1072 }
1073 
1074 /*
1075  * Adds token with Doxygen command to token list, but only if command is one of
1076  * Doxygen commands. In that case true is returned. If the command is not
1077  * recognized as a doxygen command, it is ignored and false is returned.
1078  */
addDoxyCommand(DoxygenParser::TokenList & tokList,const std::string & cmd)1079 bool DoxygenParser::addDoxyCommand(DoxygenParser::TokenList &tokList, const std::string &cmd) {
1080   if (commandBelongs(cmd) != NONE) {
1081     tokList.push_back(Token(COMMAND, cmd));
1082     return true;
1083   } else {
1084     // This function is called for the special Doxygen commands, but also for
1085     // HTML commands (or anything that looks like them, actually) and entities.
1086     // We don't recognize all of those, so just ignore them and pass them
1087     // through, but warn about unknown Doxygen commands as ignoring them will
1088     // often result in wrong output being generated.
1089     const char ch = *cmd.begin();
1090     if (ch != '<' && ch != '&') {
1091       // Before calling printListError() we must ensure that m_tokenListIt used
1092       // by it is valid.
1093       const TokenListCIt itSave = m_tokenListIt;
1094       m_tokenListIt = m_tokenList.end();
1095 
1096       printListError(WARN_DOXYGEN_UNKNOWN_COMMAND, "Unknown Doxygen command: " + cmd + ".");
1097 
1098       m_tokenListIt = itSave;
1099     }
1100   }
1101 
1102   return false;
1103 }
1104 
1105 /*
1106  * This method copies comment text to output as it is - no processing is
1107  * done, Doxygen commands are ignored. It is used for commands \verbatim,
1108  * \htmlonly, \f$, \f[, and \f{.
1109  */
processVerbatimText(size_t pos,const std::string & line)1110 size_t DoxygenParser::processVerbatimText(size_t pos, const std::string &line) {
1111   if (line[pos] == '\\' || line[pos] == '@') { // check for end commands
1112 
1113     pos++;
1114     size_t endOfWordPos = line.find_first_not_of(DOXYGEN_WORD_CHARS, pos);
1115     string cmd = line.substr(pos, endOfWordPos - pos);
1116 
1117     if (cmd == CMD_END_HTML_ONLY || cmd == CMD_END_VERBATIM || cmd == CMD_END_LATEX_1 || cmd == CMD_END_LATEX_2 || cmd == CMD_END_LATEX_3 || cmd == CMD_END_CODE) {
1118 
1119       m_isVerbatimText = false;
1120       addDoxyCommand(m_tokenList, cmd);
1121 
1122     } else {
1123 
1124       m_tokenList.push_back(Token(PLAINSTRING,
1125                                   // include '\' or '@'
1126                                   line.substr(pos - 1, endOfWordPos - pos + 1)));
1127     }
1128 
1129     pos = endOfWordPos;
1130 
1131   } else {
1132 
1133     // whitespaces are stored as plain strings
1134     size_t startOfPossibleEndCmd = line.find_first_of("\\@", pos);
1135     m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, startOfPossibleEndCmd - pos)));
1136     pos = startOfPossibleEndCmd;
1137   }
1138 
1139   return pos;
1140 }
1141 
1142 /*
1143  * Processes doxy commands for escaped characters: \$ \@ \\ \& \~ \< \> \# \% \" \. \::
1144  * Handling this separately supports documentation text like \@someText.
1145  */
processEscapedChars(size_t & pos,const std::string & line)1146 bool DoxygenParser::processEscapedChars(size_t &pos, const std::string &line) {
1147   if ((pos + 1) < line.size()) {
1148 
1149     // \ and @ with trailing whitespace or quoted get to output as plain string
1150     string whitespaces = " '\t\n";
1151     if (whitespaces.find(line[pos + 1]) != string::npos) {
1152       m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, 1)));
1153       pos++;
1154       return true;
1155     }
1156     // these chars can be escaped for doxygen
1157     string escapedChars = "$@\\&~<>#%\".";
1158     if (escapedChars.find(line[pos + 1]) != string::npos) {
1159 
1160       addDoxyCommand(m_tokenList, line.substr(pos + 1, 1));
1161       pos += 2;
1162       return true;
1163 
1164     } else if ((pos + 2) < line.size() && line[pos + 1] == ':' && line[pos + 2] == ':') {
1165 
1166       // add command \:: - handling this separately supports documentation
1167       // text like \::someText
1168       addDoxyCommand(m_tokenList, line.substr(pos + 1, 2));
1169       pos += 3;
1170       return true;
1171     }
1172   }
1173   return false;
1174 }
1175 
1176 /*
1177  * Processes word doxygen commands, like \arg, \c, \b, \return, ...
1178  */
processWordCommands(size_t & pos,const std::string & line)1179 void DoxygenParser::processWordCommands(size_t &pos, const std::string &line) {
1180   pos++;
1181   size_t endOfWordPos = getEndOfWordCommand(line, pos);
1182 
1183   string cmd = line.substr(pos, endOfWordPos - pos);
1184   addDoxyCommand(m_tokenList, cmd);
1185 
1186   // A flag for whether we want to skip leading spaces after the command
1187   bool skipLeadingSpace = true;
1188 
1189   if (cmd == CMD_HTML_ONLY || cmd == CMD_VERBATIM || cmd == CMD_LATEX_1 || cmd == CMD_LATEX_2 || cmd == CMD_LATEX_3 || getBaseCommand(cmd) == CMD_CODE) {
1190 
1191     m_isVerbatimText = true;
1192 
1193     // Skipping leading space is necessary with inline \code command,
1194     // and it won't hurt anything for block \code (TODO: are the other
1195     // commands also compatible with skip leading space?  If so, just
1196     // do it every time.)
1197     if (getBaseCommand(cmd) == CMD_CODE) skipLeadingSpace = true;
1198     else skipLeadingSpace = false;
1199   } else if (cmd.substr(0,3) == "end") {
1200     // If processing an "end" command such as "endlink", don't skip
1201     // the space before the next string
1202     skipLeadingSpace = false;
1203   }
1204 
1205   if (skipLeadingSpace) {
1206     // skip any possible spaces after command, because some commands have parameters,
1207     // and spaces between command and parameter must be ignored.
1208     if (endOfWordPos != string::npos) {
1209       endOfWordPos = line.find_first_not_of(" \t", endOfWordPos);
1210     }
1211   }
1212 
1213   pos = endOfWordPos;
1214 }
1215 
processHtmlTags(size_t & pos,const std::string & line)1216 void DoxygenParser::processHtmlTags(size_t &pos, const std::string &line) {
1217   bool isEndHtmlTag = false;
1218   pos++;
1219   if (line.size() > pos && line[pos] == '/') {
1220     isEndHtmlTag = true;
1221     pos++;
1222   }
1223 
1224   size_t endHtmlPos = line.find_first_of("\t >", pos);
1225 
1226   string cmd = line.substr(pos, endHtmlPos - pos);
1227   pos = endHtmlPos;
1228 
1229   // prepend '<' to distinguish HTML tags from doxygen commands
1230   if (!cmd.empty() && addDoxyCommand(m_tokenList, '<' + cmd)) {
1231     // it is a valid HTML command
1232     if (line[pos] != '>') {
1233       // it should be HTML tag with args,
1234       // for example <A ...>, <IMG ...>, ...
1235       if (isEndHtmlTag) {
1236         m_tokenListIt = m_tokenList.end();
1237         printListError(WARN_DOXYGEN_HTML_ERROR, "Doxygen HTML error for tag " + cmd + ": Illegal end HTML tag without '>' found.");
1238       }
1239 
1240       endHtmlPos = line.find(">", pos);
1241       if (endHtmlPos == string::npos) {
1242         m_tokenListIt = m_tokenList.end();
1243         printListError(WARN_DOXYGEN_HTML_ERROR, "Doxygen HTML error for tag " + cmd + ": HTML tag without '>' found.");
1244       }
1245       // add args of HTML command, like link URL, image URL, ...
1246       m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, endHtmlPos - pos)));
1247       pos = endHtmlPos;
1248     } else {
1249       if (isEndHtmlTag) {
1250         m_tokenList.push_back(Token(PLAINSTRING, END_HTML_TAG_MARK));
1251       } else {
1252         // it is a simple tag, so push empty string
1253         m_tokenList.push_back(Token(PLAINSTRING, ""));
1254       }
1255     }
1256 
1257     if (pos != string::npos) {
1258       pos++; // skip '>'
1259     }
1260   } else {
1261     // the command is not HTML supported by Doxygen, < and > will be
1262     // replaced by HTML entities &lt; and &gt; respectively,
1263     addDoxyCommand(m_tokenList, "&lt");
1264     m_tokenList.push_back(Token(PLAINSTRING, cmd));
1265   }
1266 }
1267 
processHtmlEntities(size_t & pos,const std::string & line)1268 void DoxygenParser::processHtmlEntities(size_t &pos, const std::string &line) {
1269   size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz", pos + 1);
1270 
1271   if (endOfWordPos != string::npos) {
1272 
1273     if (line[endOfWordPos] == ';' && (endOfWordPos - pos) > 1) {
1274       // if entity is not recognized by Doxygen (not in the list of
1275       // commands) nothing is added (here and in Doxygen).
1276       addDoxyCommand(m_tokenList, line.substr(pos, endOfWordPos - pos));
1277       endOfWordPos++; // skip ';'
1278     } else {
1279       // it is not an entity - add entity for ampersand and the rest of string
1280       addDoxyCommand(m_tokenList, "&amp");
1281       m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos + 1, endOfWordPos - pos - 1)));
1282     }
1283   }
1284   pos = endOfWordPos;
1285 }
1286 
1287 /*
1288  * This method processes normal comment, which has to be tokenized.
1289  */
processNormalComment(size_t pos,const std::string & line)1290 size_t DoxygenParser::processNormalComment(size_t pos, const std::string &line) {
1291   switch (line[pos]) {
1292   case '\\':
1293   case '@':
1294     if (processEscapedChars(pos, line)) {
1295       break;
1296     }
1297     // handle word commands \arg, \c, \return, ... and \f[, \f$, ... commands
1298     processWordCommands(pos, line);
1299     break;
1300 
1301   case ' ': // whitespace
1302   case '\t':
1303     {
1304       // whitespaces are stored as plain strings
1305       size_t startOfNextWordPos = line.find_first_not_of(" \t", pos + 1);
1306       m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, startOfNextWordPos - pos)));
1307       pos = startOfNextWordPos;
1308     }
1309     break;
1310 
1311   case '<':
1312     processHtmlTags(pos, line);
1313     break;
1314   case '>': // this char is detected here only when it is not part of HTML tag
1315     addDoxyCommand(m_tokenList, "&gt");
1316     pos++;
1317     break;
1318   case '&':
1319     processHtmlEntities(pos, line);
1320     break;
1321   case '"':
1322     m_isInQuotedString = true;
1323     m_tokenList.push_back(Token(PLAINSTRING, "\""));
1324     pos++;
1325     break;
1326   default:
1327     m_tokenListIt = m_tokenList.end();
1328     printListError(WARN_DOXYGEN_UNKNOWN_CHARACTER, std::string("Unknown special character in Doxygen comment: ") + line[pos] + ".");
1329   }
1330 
1331   return pos;
1332 }
1333 
1334 /*
1335  * This is the main method, which tokenizes Doxygen comment to words and
1336  * doxygen commands.
1337  */
tokenizeDoxygenComment(const std::string & doxygenComment,const std::string & fileName,int fileLine)1338 void DoxygenParser::tokenizeDoxygenComment(const std::string &doxygenComment, const std::string &fileName, int fileLine) {
1339   m_isVerbatimText = false;
1340   m_isInQuotedString = false;
1341   m_tokenList.clear();
1342   m_fileLineNo = fileLine;
1343   m_fileName = fileName;
1344 
1345   StringVector lines = split(doxygenComment, '\n');
1346 
1347   // remove trailing spaces, because they cause additional new line at the end
1348   // comment, which is wrong, because these spaces are space preceding
1349   // end of comment :  '  */'
1350   if (!doxygenComment.empty() && doxygenComment[doxygenComment.size() - 1] == ' ') {
1351 
1352     string lastLine = lines[lines.size() - 1];
1353 
1354     if (trim(lastLine).empty()) {
1355       lines.pop_back(); // remove trailing empty line
1356     }
1357   }
1358 
1359   for (StringVectorCIt it = lines.begin(); it != lines.end(); it++) {
1360     const string &line = *it;
1361     size_t pos = line.find_first_not_of(" \t");
1362 
1363     if (pos == string::npos) {
1364       m_tokenList.push_back(Token(END_LINE, "\n"));
1365       continue;
1366     }
1367     // skip sequences of '*', '/', and '!' of any length
1368     bool isStartOfCommentLineCharFound = false;
1369     while (pos < line.size() && isStartOfDoxyCommentChar(line[pos])) {
1370       pos++;
1371       isStartOfCommentLineCharFound = true;
1372     }
1373 
1374     if (pos == line.size()) {
1375       m_tokenList.push_back(Token(END_LINE, "\n"));
1376       continue;
1377     }
1378     // if 'isStartOfCommentLineCharFound' then preserve leading spaces, so
1379     // ' *    comment' gets translated to ' *    comment', not ' * comment'
1380     // This is important to keep formatting for comments translated to Python.
1381     if (isStartOfCommentLineCharFound && line[pos] == ' ') {
1382       pos++; // points to char after ' * '
1383       if (pos == line.size()) {
1384         m_tokenList.push_back(Token(END_LINE, "\n"));
1385         continue;
1386       }
1387     }
1388     // line[pos] may be ' \t' or start of word, it there was no '*', '/' or '!'
1389     // at beginning of the line. Make sure it points to start of the first word
1390     // in the line.
1391     if (isStartOfCommentLineCharFound) {
1392       size_t firstWordPos = line.find_first_not_of(" \t", pos);
1393       if (firstWordPos == string::npos) {
1394         m_tokenList.push_back(Token(END_LINE, "\n"));
1395         continue;
1396       }
1397 
1398       if (firstWordPos > pos) {
1399         m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, firstWordPos - pos)));
1400         pos = firstWordPos;
1401       }
1402     } else {
1403       m_tokenList.push_back(Token(PLAINSTRING, line.substr(0, pos)));
1404     }
1405 
1406     while (pos != string::npos) {
1407       // find the end of the word
1408       size_t doxyCmdOrHtmlTagPos = line.find_first_of("\\@<>&\" \t", pos);
1409       if (doxyCmdOrHtmlTagPos != pos) {
1410         // plain text found
1411         // if the last char is punctuation, make it a separate word, otherwise
1412         // it may be included with word also when not appropriate, for example:
1413         //   colors are \b red, green, and blue --> colors are <b>red,</b> green, and blue
1414         // instead of (comma not bold):
1415         //   colors are \b red, green, and blue --> colors are <b>red</b>, green, and blue
1416         // In Python it looks even worse:
1417         //   colors are \b red, green, and blue --> colors are 'red,' green, and blue
1418         string text = line.substr(pos, doxyCmdOrHtmlTagPos - pos);
1419         string punctuations(".,:");
1420         size_t textSize = text.size();
1421 
1422         if (!text.empty()
1423             && punctuations.find(text[text.size() - 1]) != string::npos &&
1424             // but do not break ellipsis (...)
1425             !(textSize > 1 && text[textSize - 2] == '.')) {
1426           m_tokenList.push_back(Token(PLAINSTRING, text.substr(0, text.size() - 1)));
1427           m_tokenList.push_back(Token(PLAINSTRING, text.substr(text.size() - 1)));
1428         } else {
1429           m_tokenList.push_back(Token(PLAINSTRING, text));
1430         }
1431       }
1432 
1433       pos = doxyCmdOrHtmlTagPos;
1434       if (pos != string::npos) {
1435         if (m_isVerbatimText) {
1436           pos = processVerbatimText(pos, line);
1437 
1438         } else if (m_isInQuotedString) {
1439 
1440           if (line[pos] == '"') {
1441             m_isInQuotedString = false;
1442           }
1443           m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, 1)));
1444           pos++;
1445 
1446         } else {
1447           pos = processNormalComment(pos, line);
1448         }
1449       }
1450     }
1451     m_tokenList.push_back(Token(END_LINE, "\n")); // add when pos == npos - end of line
1452   }
1453 
1454   m_tokenListIt = m_tokenList.begin();
1455 }
1456 
printList()1457 void DoxygenParser::printList() {
1458 
1459   int tokNo = 0;
1460   for (TokenListCIt it = m_tokenList.begin(); it != m_tokenList.end(); it++, tokNo++) {
1461 
1462     cout << it->toString() << " ";
1463 
1464     if ((tokNo % TOKENSPERLINE) == 0) {
1465       cout << endl;
1466     }
1467   }
1468 }
1469 
printListError(int warningType,const std::string & message)1470 void DoxygenParser::printListError(int warningType, const std::string &message) {
1471   int curLine = m_fileLineNo;
1472   for (TokenListCIt it = m_tokenList.begin(); it != m_tokenListIt; it++) {
1473     if (it->m_tokenType == END_LINE) {
1474       curLine++;
1475     }
1476   }
1477 
1478   Swig_warning(warningType, m_fileName.c_str(), curLine, "%s\n", message.c_str());
1479 }
1480