1 /* -----------------------------------------------------------------------------
2 * This file is part of SWIG, which is licensed as a whole under version 3
3 * (or any later version) of the GNU General Public License. Some additional
4 * terms also apply to certain portions of SWIG. The full details of the SWIG
5 * license and copyrights can be found in the LICENSE and COPYRIGHT files
6 * included with the SWIG source code as distributed by the SWIG developers
7 * and at http://www.swig.org/legal.html.
8 *
9 * doxyparser.cxx
10 * ----------------------------------------------------------------------------- */
11
12 #include "doxyparser.h"
13 #include "doxycommands.h"
14 #include "swig.h"
15 #include "swigwarn.h"
16
17 #include <iostream>
18 #include <algorithm>
19 #include <vector>
20
21 using std::string;
22 using std::cout;
23 using std::endl;
24
25 // This constant defines the (only) characters valid inside a Doxygen "word".
26 // It includes some unusual ones because of the commands such as \f[, \f{, \f],
27 // \f} and \f$.
28 static const char *DOXYGEN_WORD_CHARS = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "$[]{}";
29
30 // Define static class members
31 DoxygenParser::DoxyCommandsMap DoxygenParser::doxygenCommands;
32 std::set<std::string> DoxygenParser::doxygenSectionIndicators;
33
34 const int TOKENSPERLINE = 8; //change this to change the printing behaviour of the token list
35 const std::string END_HTML_TAG_MARK("/");
36
getBaseCommand(const std::string & cmd)37 std::string getBaseCommand(const std::string &cmd) {
38 if (cmd.substr(0,5) == "param")
39 return "param";
40 else if (cmd.substr(0,4) == "code")
41 return "code";
42 else
43 return cmd;
44 }
45
46 // Find the first position beyond the word command. Extra logic is
47 // used to avoid putting the characters "," and "." in
48 // DOXYGEN_WORD_CHARS.
getEndOfWordCommand(const std::string & line,size_t pos)49 static size_t getEndOfWordCommand(const std::string &line, size_t pos) {
50 size_t endOfWordPos = line.find_first_not_of(DOXYGEN_WORD_CHARS, pos);
51 if (line.substr(pos, 6) == "param[")
52 // include ",", which can appear in param[in,out]
53 endOfWordPos = line.find_first_not_of(string(DOXYGEN_WORD_CHARS)+ ",", pos);
54 else if (line.substr(pos, 5) == "code{")
55 // include ".", which can appear in e.g. code{.py}
56 endOfWordPos = line.find_first_not_of(string(DOXYGEN_WORD_CHARS)+ ".", pos);
57 return endOfWordPos;
58 }
59
60
DoxygenParser(bool noisy)61 DoxygenParser::DoxygenParser(bool noisy) : noisy(noisy) {
62 fillTables();
63 }
64
~DoxygenParser()65 DoxygenParser::~DoxygenParser() {
66 }
67
fillTables()68 void DoxygenParser::fillTables() {
69 // run it only once
70 if (doxygenCommands.size())
71 return;
72
73 // fill in tables with data from doxycommands.h
74 for (int i = 0; i < simpleCommandsSize; i++)
75 doxygenCommands[simpleCommands[i]] = SIMPLECOMMAND;
76
77 for (int i = 0; i < commandWordsSize; i++)
78 doxygenCommands[commandWords[i]] = COMMANDWORD;
79
80 for (int i = 0; i < commandLinesSize; i++)
81 doxygenCommands[commandLines[i]] = COMMANDLINE;
82
83 for (int i = 0; i < commandParagraphSize; i++)
84 doxygenCommands[commandParagraph[i]] = COMMANDPARAGRAPH;
85
86 for (int i = 0; i < commandEndCommandsSize; i++)
87 doxygenCommands[commandEndCommands[i]] = COMMANDENDCOMMAND;
88
89 for (int i = 0; i < commandWordParagraphsSize; i++)
90 doxygenCommands[commandWordParagraphs[i]] = COMMANDWORDPARAGRAPH;
91
92 for (int i = 0; i < commandWordLinesSize; i++)
93 doxygenCommands[commandWordLines[i]] = COMMANDWORDLINE;
94
95 for (int i = 0; i < commandWordOWordOWordsSize; i++)
96 doxygenCommands[commandWordOWordOWords[i]] = COMMANDWORDOWORDWORD;
97
98 for (int i = 0; i < commandOWordsSize; i++)
99 doxygenCommands[commandOWords[i]] = COMMANDOWORD;
100
101 for (int i = 0; i < commandErrorThrowingsSize; i++)
102 doxygenCommands[commandErrorThrowings[i]] = COMMANDERRORTHROW;
103
104 for (int i = 0; i < commandUniquesSize; i++)
105 doxygenCommands[commandUniques[i]] = COMMANDUNIQUE;
106
107 for (int i = 0; i < commandHtmlSize; i++)
108 doxygenCommands[commandHtml[i]] = COMMAND_HTML;
109
110 for (int i = 0; i < commandHtmlEntitiesSize; i++)
111 doxygenCommands[commandHtmlEntities[i]] = COMMAND_HTML_ENTITY;
112
113 // fill section indicators command set
114 for (int i = 0; i < sectionIndicatorsSize; i++)
115 doxygenSectionIndicators.insert(sectionIndicators[i]);
116 }
117
stringToLower(const std::string & stringToConvert)118 std::string DoxygenParser::stringToLower(const std::string &stringToConvert) {
119
120 string result(stringToConvert.size(), ' ');
121
122 for (size_t i = 0; i < result.size(); i++) {
123 result[i] = tolower(stringToConvert[i]);
124 }
125
126 return result;
127 }
128
isSectionIndicator(const std::string & smallString)129 bool DoxygenParser::isSectionIndicator(const std::string &smallString) {
130
131 std::set<std::string>::iterator it = doxygenSectionIndicators.find(stringToLower(smallString));
132
133 return it != doxygenSectionIndicators.end();
134 }
135
printTree(const DoxygenEntityList & rootList)136 void DoxygenParser::printTree(const DoxygenEntityList &rootList) {
137 DoxygenEntityList::const_iterator p = rootList.begin();
138 while (p != rootList.end()) {
139 (*p).printEntity(0);
140 p++;
141 }
142 }
143
commandBelongs(const std::string & theCommand)144 DoxygenParser::DoxyCommandEnum DoxygenParser::commandBelongs(const std::string &theCommand) {
145 DoxyCommandsMapIt it = doxygenCommands.find(stringToLower(getBaseCommand(theCommand)));
146
147 if (it != doxygenCommands.end()) {
148 return it->second;
149 }
150 // Check if this command is defined as an alias.
151 if (Getattr(m_node, ("feature:doxygen:alias:" + theCommand).c_str())) {
152 return COMMAND_ALIAS;
153 }
154 // Check if this command should be ignored.
155 if (String *const ignore = getIgnoreFeature(theCommand)) {
156 // Check that no value is specified for this feature ("1" is the implicit
157 // one given to it by SWIG itself), we may use the value in the future, but
158 // for now we only use the attributes.
159 if (Strcmp(ignore, "1") != 0) {
160 Swig_warning(WARN_PP_UNEXPECTED_TOKENS, m_fileName.c_str(), m_fileLineNo,
161 "Feature \"doxygen:ignore\" value ignored for Doxygen command \"%s\".\n", theCommand.c_str());
162 }
163 // Also ensure that the matching end command, if any, will be recognized.
164 const string endCommand = getIgnoreFeatureEndCommand(theCommand);
165 if (!endCommand.empty()) {
166 Setattr(m_node, ("feature:doxygen:ignore:" + endCommand).c_str(), NewString("1"));
167 }
168
169 return COMMAND_IGNORE;
170 }
171
172 return NONE;
173 }
174
trim(const std::string & text)175 std::string DoxygenParser::trim(const std::string &text) {
176 size_t start = text.find_first_not_of(" \t");
177 size_t end = text.find_last_not_of(" \t");
178
179 if (start == string::npos || start > end) {
180 return "";
181 }
182 return text.substr(start, end - start + 1);
183 }
184
isEndOfLine()185 bool DoxygenParser::isEndOfLine() {
186 if (m_tokenListIt == m_tokenList.end()) {
187 return false;
188 }
189 Token nextToken = *m_tokenListIt;
190 return nextToken.m_tokenType == END_LINE;
191 }
192
skipWhitespaceTokens()193 void DoxygenParser::skipWhitespaceTokens() {
194 if (m_tokenListIt == m_tokenList.end()) {
195 return;
196 }
197
198 while (m_tokenListIt != m_tokenList.end()
199 && (m_tokenListIt->m_tokenType == END_LINE || trim(m_tokenListIt->m_tokenString).empty())) {
200
201 m_tokenListIt++;
202 }
203 }
204
getNextToken()205 std::string DoxygenParser::getNextToken() {
206
207 if (m_tokenListIt == m_tokenList.end()) {
208 return "";
209 }
210
211 if (m_tokenListIt->m_tokenType == PLAINSTRING) {
212 return (m_tokenListIt++)->m_tokenString;
213 }
214
215 return "";
216 }
217
getNextWord()218 std::string DoxygenParser::getNextWord() {
219
220 /* if (m_tokenListIt == m_tokenList.end()) {
221 return "";
222 }
223 */
224 while (m_tokenListIt != m_tokenList.end()
225 && (m_tokenListIt->m_tokenType == PLAINSTRING)) {
226 // handle quoted strings as words
227 string token = m_tokenListIt->m_tokenString;
228 if (token == "\"") {
229
230 string word = m_tokenListIt->m_tokenString;
231 m_tokenListIt++;
232 while (true) {
233 string nextWord = getNextToken();
234 if (nextWord.empty()) { // maybe report unterminated string error
235 return word;
236 }
237 word += nextWord;
238 if (nextWord == "\"") {
239 return word;
240 }
241 }
242 }
243
244 string tokenStr = trim(m_tokenListIt->m_tokenString);
245 m_tokenListIt++;
246 if (!tokenStr.empty()) {
247 return tokenStr;
248 }
249 }
250
251 return "";
252 }
253
getOneLine(const TokenList & tokList)254 DoxygenParser::TokenListCIt DoxygenParser::getOneLine(const TokenList &tokList) {
255
256 TokenListCIt endOfLineIt = m_tokenListIt;
257
258 while (endOfLineIt != tokList.end()) {
259 if (endOfLineIt->m_tokenType == END_LINE) {
260 return endOfLineIt;
261 }
262 endOfLineIt++;
263 }
264
265 return tokList.end();
266 }
267
getStringTilCommand(const TokenList & tokList)268 std::string DoxygenParser::getStringTilCommand(const TokenList &tokList) {
269
270 if (m_tokenListIt == tokList.end()) {
271 return "";
272 }
273
274 string description;
275
276 while (m_tokenListIt->m_tokenType == PLAINSTRING) {
277 const Token ¤tToken = *m_tokenListIt++;
278 if (currentToken.m_tokenType == PLAINSTRING) {
279 description = description + currentToken.m_tokenString; // + " ";
280 }
281 }
282 return description;
283 }
284
getStringTilEndCommand(const std::string & theCommand,const TokenList & tokList)285 std::string DoxygenParser::getStringTilEndCommand(const std::string &theCommand, const TokenList &tokList) {
286
287 if (m_tokenListIt == tokList.end()) {
288 return "";
289 }
290
291 string description;
292 while (m_tokenListIt != tokList.end()) {
293
294 if (m_tokenListIt->m_tokenType == PLAINSTRING) {
295 description += m_tokenListIt->m_tokenString;
296 } else if (m_tokenListIt->m_tokenType == END_LINE) {
297 description += "\n";
298 } else if (m_tokenListIt->m_tokenString == theCommand) {
299 m_tokenListIt++;
300 return description;
301 }
302
303 m_tokenListIt++;
304 }
305
306 printListError(WARN_DOXYGEN_COMMAND_EXPECTED, "Expected Doxygen command: " + theCommand + ".");
307
308 return description;
309 }
310
getEndOfParagraph(const TokenList & tokList)311 DoxygenParser::TokenListCIt DoxygenParser::getEndOfParagraph(const TokenList &tokList) {
312
313 TokenListCIt endOfParagraph = m_tokenListIt;
314
315 while (endOfParagraph != tokList.end()) {
316 // If \code or \verbatim is encountered within a paragraph, then
317 // go all the way to the end of that command, since the content
318 // could contain empty lines that would appear to be paragraph
319 // ends:
320 if (endOfParagraph->m_tokenType == COMMAND &&
321 (endOfParagraph->m_tokenString == "code" ||
322 endOfParagraph->m_tokenString == "verbatim")) {
323 const string theCommand = endOfParagraph->m_tokenString;
324 endOfParagraph = getEndCommand("end" + theCommand, tokList);
325 endOfParagraph++; // Move after the end command
326 return endOfParagraph;
327 }
328 if (endOfParagraph->m_tokenType == END_LINE) {
329 endOfParagraph++;
330 if (endOfParagraph != tokList.end()
331 && endOfParagraph->m_tokenType == END_LINE) {
332 endOfParagraph++;
333 //cout << "ENCOUNTERED END OF PARA" << endl;
334 return endOfParagraph;
335 }
336
337 } else if (endOfParagraph->m_tokenType == COMMAND) {
338
339 if (isSectionIndicator(getBaseCommand(endOfParagraph->m_tokenString))) {
340 return endOfParagraph;
341 } else {
342 endOfParagraph++;
343 }
344
345 } else if (endOfParagraph->m_tokenType == PLAINSTRING) {
346 endOfParagraph++;
347 } else {
348 return tokList.end();
349 }
350 }
351
352 return tokList.end();
353 }
354
getEndOfSection(const std::string & theCommand,const TokenList & tokList)355 DoxygenParser::TokenListCIt DoxygenParser::getEndOfSection(const std::string &theCommand, const TokenList &tokList) {
356
357 TokenListCIt endOfParagraph = m_tokenListIt;
358
359 while (endOfParagraph != tokList.end()) {
360 if (endOfParagraph->m_tokenType == COMMAND) {
361 if (theCommand == endOfParagraph->m_tokenString)
362 return endOfParagraph;
363 else
364 endOfParagraph++;
365 } else if (endOfParagraph->m_tokenType == PLAINSTRING) {
366 endOfParagraph++;
367 } else if (endOfParagraph->m_tokenType == END_LINE) {
368 endOfParagraph++;
369 if (endOfParagraph->m_tokenType == END_LINE) {
370 endOfParagraph++;
371 return endOfParagraph;
372 }
373 }
374 }
375 return tokList.end();
376 }
377
getEndCommand(const std::string & theCommand,const TokenList & tokList)378 DoxygenParser::TokenListCIt DoxygenParser::getEndCommand(const std::string &theCommand, const TokenList &tokList) {
379
380 TokenListCIt endOfCommand = m_tokenListIt;
381
382 while (endOfCommand != tokList.end()) {
383 endOfCommand++;
384 if ((*endOfCommand).m_tokenType == COMMAND) {
385 if (theCommand == (*endOfCommand).m_tokenString) {
386 return endOfCommand;
387 }
388 }
389 }
390 //End command not found
391 return tokList.end();
392 }
393
skipEndOfLine()394 void DoxygenParser::skipEndOfLine() {
395 if (m_tokenListIt != m_tokenList.end()
396 && m_tokenListIt->m_tokenType == END_LINE) {
397 m_tokenListIt++;
398 }
399 }
400
addSimpleCommand(const std::string & theCommand,DoxygenEntityList & doxyList)401 void DoxygenParser::addSimpleCommand(const std::string &theCommand, DoxygenEntityList &doxyList) {
402 if (noisy)
403 cout << "Parsing " << theCommand << endl;
404
405 doxyList.push_back(DoxygenEntity(theCommand));
406 }
407
addCommandWord(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)408 void DoxygenParser::addCommandWord(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
409 if (noisy)
410 cout << "Parsing " << theCommand << endl;
411
412 if (isEndOfLine()) {
413 // handles cases when command is at the end of line (for example "\c\nreally"
414 skipWhitespaceTokens();
415 doxyList.push_back(DoxygenEntity("plainstd::endl"));
416 }
417 std::string name = getNextWord();
418 if (!name.empty()) {
419 DoxygenEntityList aNewList;
420 aNewList.push_back(DoxygenEntity("plainstd::string", name));
421 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
422 } else {
423 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
424 }
425 }
426
addCommandLine(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)427 void DoxygenParser::addCommandLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
428 if (noisy)
429 cout << "Parsing " << theCommand << endl;
430 TokenListCIt endOfLine = getOneLine(tokList);
431 DoxygenEntityList aNewList = parse(endOfLine, tokList);
432 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
433 skipEndOfLine();
434 }
435
addCommandParagraph(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)436 void DoxygenParser::addCommandParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
437 if (noisy)
438 cout << "Parsing " << theCommand << endl;
439
440 TokenListCIt endOfParagraph = getEndOfParagraph(tokList);
441 DoxygenEntityList aNewList;
442 aNewList = parse(endOfParagraph, tokList);
443 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
444 }
445
addCommandEndCommand(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)446 void DoxygenParser::addCommandEndCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
447 if (noisy)
448 cout << "Parsing " << theCommand << endl;
449 TokenListCIt endCommand = getEndCommand("end" + theCommand, tokList);
450 if (endCommand == tokList.end()) {
451 printListError(WARN_DOXYGEN_COMMAND_EXPECTED, "Expected Doxygen command: end" + theCommand + ".");
452 return;
453 }
454 DoxygenEntityList aNewList;
455 aNewList = parse(endCommand, tokList);
456 m_tokenListIt++;
457 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
458 }
459
addCommandWordParagraph(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)460 void DoxygenParser::addCommandWordParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
461 if (noisy)
462 cout << "Parsing " << theCommand << endl;
463
464 std::string name = getNextWord();
465
466 if (name.empty()) {
467 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
468 return;
469 }
470 TokenListCIt endOfParagraph = getEndOfParagraph(tokList);
471 DoxygenEntityList aNewList;
472 aNewList = parse(endOfParagraph, tokList);
473 aNewList.push_front(DoxygenEntity("plainstd::string", name));
474 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
475 }
476
addCommandWordLine(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)477 void DoxygenParser::addCommandWordLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
478 if (noisy)
479 cout << "Parsing " << theCommand << endl;
480 std::string name = getNextWord();
481 if (name.empty()) {
482 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
483 return;
484 }
485
486 TokenListCIt endOfLine = getOneLine(tokList);
487 DoxygenEntityList aNewList;
488 aNewList = parse(endOfLine, tokList);
489 aNewList.push_front(DoxygenEntity("plainstd::string", name));
490 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
491 //else cout << "No line followed " << theCommand << " command. Not added" << endl;
492 }
493
addCommandWordOWordOWord(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)494 void DoxygenParser::addCommandWordOWordOWord(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
495 if (noisy)
496 cout << "Parsing " << theCommand << endl;
497
498 std::string name = getNextWord();
499 if (name.empty()) {
500 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
501 return;
502 }
503 std::string headerfile = getNextWord();
504 std::string headername = getNextWord();
505 DoxygenEntityList aNewList;
506 aNewList.push_back(DoxygenEntity("plainstd::string", name));
507 if (!headerfile.empty())
508 aNewList.push_back(DoxygenEntity("plainstd::string", headerfile));
509 if (!headername.empty())
510 aNewList.push_back(DoxygenEntity("plainstd::string", headername));
511 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
512 }
513
addCommandOWord(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)514 void DoxygenParser::addCommandOWord(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
515 if (noisy)
516 cout << "Parsing " << theCommand << endl;
517
518 std::string name = getNextWord();
519 DoxygenEntityList aNewList;
520 aNewList.push_back(DoxygenEntity("plainstd::string", name));
521 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
522 }
523
addCommandErrorThrow(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList &)524 void DoxygenParser::addCommandErrorThrow(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &) {
525
526 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": Unexpectedly encountered this command.");
527 m_tokenListIt = getOneLine(tokList);
528 }
529
addCommandHtml(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)530 void DoxygenParser::addCommandHtml(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
531 if (noisy)
532 cout << "Parsing " << theCommand << endl;
533
534 std::string htmlTagArgs = getNextToken();
535 doxyList.push_back(DoxygenEntity(theCommand, htmlTagArgs));
536 }
537
addCommandHtmlEntity(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)538 void DoxygenParser::addCommandHtmlEntity(const std::string &theCommand, const TokenList &, DoxygenEntityList &doxyList) {
539 if (noisy)
540 cout << "Parsing " << theCommand << endl;
541
542 DoxygenEntityList aNewList;
543 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
544 }
545
addCommandUnique(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)546 void DoxygenParser::addCommandUnique(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
547
548 static std::map<std::string, std::string> endCommands;
549 DoxygenEntityList aNewList;
550 if (theCommand == "arg" || theCommand == "li") {
551 TokenListCIt endOfSection = getEndOfSection(theCommand, tokList);
552 DoxygenEntityList aNewList;
553 aNewList = parse(endOfSection, tokList);
554 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
555 }
556 // \xrefitem <key> "(heading)" "(std::list title)" {text}
557 else if (theCommand == "xrefitem") {
558 if (noisy)
559 cout << "Parsing " << theCommand << endl;
560 std::string key = getNextWord();
561 if (key.empty()) {
562 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No key followed the command. Command ignored.");
563 return;
564 }
565 std::string heading = getNextWord();
566 if (key.empty()) {
567 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No heading followed the command. Command ignored.");
568 return;
569 }
570 std::string title = getNextWord();
571 if (title.empty()) {
572 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No title followed the command. Command ignored.");
573 return;
574 }
575 TokenListCIt endOfParagraph = getEndOfParagraph(tokList);
576 aNewList = parse(endOfParagraph, tokList);
577 aNewList.push_front(DoxygenEntity("plainstd::string", title));
578 aNewList.push_front(DoxygenEntity("plainstd::string", heading));
579 aNewList.push_front(DoxygenEntity("plainstd::string", key));
580 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
581 }
582 // \ingroup (<groupname> [<groupname> <groupname>])
583 else if (theCommand == "ingroup") {
584 std::string name = getNextWord();
585 aNewList.push_back(DoxygenEntity("plainstd::string", name));
586 name = getNextWord();
587 if (!name.empty())
588 aNewList.push_back(DoxygenEntity("plainstd::string", name));
589 name = getNextWord();
590 if (!name.empty())
591 aNewList.push_back(DoxygenEntity("plainstd::string", name));
592 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
593 }
594 // \par [(paragraph title)] { paragraph }
595 else if (theCommand == "par") {
596 TokenListCIt endOfLine = getOneLine(tokList);
597 aNewList = parse(endOfLine, tokList);
598 DoxygenEntityList aNewList2;
599 TokenListCIt endOfParagraph = getEndOfParagraph(tokList);
600 aNewList2 = parse(endOfParagraph, tokList);
601 aNewList.splice(aNewList.end(), aNewList2);
602 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
603 }
604 // \headerfile <header-file> [<header-name>]
605 else if (theCommand == "headerfile") {
606 DoxygenEntityList aNewList;
607 std::string name = getNextWord();
608 aNewList.push_back(DoxygenEntity("plainstd::string", name));
609 name = getNextWord();
610 if (!name.empty())
611 aNewList.push_back(DoxygenEntity("plainstd::string", name));
612 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
613 }
614 // \overload [(function declaration)]
615 else if (theCommand == "overload") {
616 TokenListCIt endOfLine = getOneLine(tokList);
617 if (endOfLine != m_tokenListIt) {
618 DoxygenEntityList aNewList;
619 aNewList = parse(endOfLine, tokList);
620 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
621 } else
622 doxyList.push_back(DoxygenEntity(theCommand));
623 }
624 // \weakgroup <name> [(title)]
625 else if (theCommand == "weakgroup") {
626 if (noisy)
627 cout << "Parsing " << theCommand << endl;
628 std::string name = getNextWord();
629 if (name.empty()) {
630 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
631 return;
632 }
633 DoxygenEntityList aNewList;
634 TokenListCIt endOfLine = getOneLine(tokList);
635 if (endOfLine != m_tokenListIt) {
636 aNewList = parse(endOfLine, tokList);
637 }
638 aNewList.push_front(DoxygenEntity("plainstd::string", name));
639 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
640 }
641 // \ref <name> ["(text)"]
642 else if (theCommand == "ref") {
643 if (noisy)
644 cout << "Parsing " << theCommand << endl;
645 std::string name = getNextWord();
646 if (name.empty()) {
647 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No key followed the command. Command ignored.");
648 return;
649 }
650 DoxygenEntityList aNewList;
651 aNewList.push_front(DoxygenEntity("plainstd::string", name));
652 // TokenListCIt endOfLine = getOneLine(tokList);
653 // if (endOfLine != m_tokenListIt) {
654 // aNewList = parse(endOfLine, tokList);
655 //}
656 TokenListCIt tmpIt = m_tokenListIt;
657 std::string refTitle = getNextWord();
658 // If title is following the ref tag, it must be quoted. Otherwise
659 // doxy puts link on ref id.
660 if (refTitle.size() > 1 && refTitle[0] == '"') {
661 // remove quotes
662 refTitle = refTitle.substr(1, refTitle.size() - 2);
663 aNewList.push_back(DoxygenEntity("plainstd::string", refTitle));
664 } else {
665 // no quoted string is following, so we have to restore iterator
666 m_tokenListIt = tmpIt;
667 }
668 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
669 }
670 // \subpage <name> ["(text)"]
671 else if (theCommand == "subpage") {
672 if (noisy)
673 cout << "Parsing " << theCommand << endl;
674 std::string name = getNextWord();
675 if (name.empty()) {
676 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No name followed the command. Command ignored.");
677 return;
678 }
679 std::string text = getNextWord();
680 aNewList.push_back(DoxygenEntity("plainstd::string", name));
681 if (!text.empty())
682 aNewList.push_back(DoxygenEntity("plainstd::string", text));
683 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
684 }
685 // \code ... \endcode
686 // \verbatim ... \endverbatim
687 // \dot dotcode \enddot
688 // \msc msccode \endmsc
689 // \f[ ... \f]
690 // \f{ ... \f}
691 // \f{env}{ ... \f}
692 // \f$ ... \f$
693 else if (getBaseCommand(theCommand) == "code" || theCommand == "verbatim"
694 || theCommand == "dot" || theCommand == "msc" || theCommand == "f[" || theCommand == "f{" || theCommand == "f$") {
695 if (!endCommands.size()) {
696 // fill in static table of end commands
697 endCommands["f["] = "f]";
698 endCommands["f{"] = "f}";
699 endCommands["f$"] = "f$";
700 }
701 if (noisy)
702 cout << "Parsing " << theCommand << endl;
703
704 std::string endCommand;
705 std::map<std::string, std::string>::iterator it;
706 it = endCommands.find(theCommand);
707 if (it != endCommands.end())
708 endCommand = it->second;
709 else
710 endCommand = "end" + getBaseCommand(theCommand);
711
712 std::string content = getStringTilEndCommand(endCommand, tokList);
713 aNewList.push_back(DoxygenEntity("plainstd::string", content));
714 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
715 }
716 // \dotfile <file> ["caption"]
717 // \mscfile <file> ["caption"]
718 else if (theCommand == "dotfile" || theCommand == "mscfile") {
719 if (noisy)
720 cout << "Parsing " << theCommand << endl;
721 std::string file = getNextWord();
722 if (file.empty()) {
723 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No file followed the command. Command ignored.");
724 return;
725 }
726 std::string caption = getNextWord();
727 aNewList.push_back(DoxygenEntity("plainstd::string", file));
728 if (!caption.empty())
729 aNewList.push_back(DoxygenEntity("plainstd::string", caption));
730 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
731 }
732 // \image <format> <file> ["caption"] [<sizeindication>=<size>]
733 else if (theCommand == "image") {
734 if (noisy)
735 cout << "Parsing " << theCommand << endl;
736 std::string format = getNextWord();
737 if (format.empty()) {
738 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No format followed the command. Command ignored.");
739 return;
740 }
741 std::string file = getNextWord();
742 if (file.empty()) {
743 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No name followed the command. Command ignored.");
744 return;
745 }
746 std::string caption = getNextWord();
747 std::string size = getNextWord();
748
749 DoxygenEntityList aNewList;
750 aNewList.push_back(DoxygenEntity("plainstd::string", format));
751 aNewList.push_back(DoxygenEntity("plainstd::string", file));
752 if (!caption.empty())
753 aNewList.push_back(DoxygenEntity("plainstd::string", caption));
754 if (!size.empty())
755 aNewList.push_back(DoxygenEntity("plainstd::string", size));
756 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
757 }
758 // \addtogroup <name> [(title)]
759 else if (theCommand == "addtogroup") {
760 if (noisy)
761 cout << "Parsing " << theCommand << endl;
762 std::string name = getNextWord();
763 if (name.empty()) {
764 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": There should be at least one word following the command. Command ignored.");
765 return;
766 }
767 DoxygenEntityList aNewList;
768 TokenListCIt endOfLine = getOneLine(tokList);
769 if (endOfLine != m_tokenListIt) {
770 aNewList = parse(endOfLine, tokList);
771 }
772 aNewList.push_front(DoxygenEntity("plainstd::string", name));
773 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
774 skipEndOfLine();
775 }
776 // \if <cond> [\else ...] [\elseif <cond> ...] \endif
777 else if (theCommand == "if" || theCommand == "ifnot" || theCommand == "else" || theCommand == "elseif") {
778 if (noisy)
779 cout << "Parsing " << theCommand << endl;
780
781 std::string cond;
782 bool skipEndif = false; // if true then we skip endif after parsing block of code
783 bool needsCond = (theCommand == "if" || theCommand == "ifnot" || theCommand == "elseif");
784 if (needsCond) {
785 cond = getNextWord();
786 if (cond.empty()) {
787 printListError(WARN_DOXYGEN_COMMAND_ERROR, "Error parsing Doxygen command " + theCommand + ": No word followed the command. Command ignored.");
788 return;
789 }
790 }
791
792 int nestedCounter = 1;
793 TokenListCIt endCommand = tokList.end();
794
795 // go through the commands and find closing endif or else or elseif
796 for (TokenListCIt it = m_tokenListIt; it != tokList.end(); it++) {
797 if (it->m_tokenType == COMMAND) {
798 if (it->m_tokenString == "if" || it->m_tokenString == "ifnot")
799 nestedCounter++;
800 else if (it->m_tokenString == "endif")
801 nestedCounter--;
802 if (nestedCounter == 1 && (it->m_tokenString == "else" || it->m_tokenString == "elseif")) { // else found
803 endCommand = it;
804 break;
805 }
806 if (nestedCounter == 0) { // endif found
807 endCommand = it;
808 skipEndif = true;
809 break;
810 }
811 }
812 }
813
814 if (endCommand == tokList.end()) {
815 printListError(WARN_DOXYGEN_COMMAND_EXPECTED, "Expected Doxygen command: endif.");
816 return;
817 }
818
819 DoxygenEntityList aNewList;
820 aNewList = parse(endCommand, tokList);
821 if (skipEndif)
822 m_tokenListIt++;
823 if (needsCond)
824 aNewList.push_front(DoxygenEntity("plainstd::string", cond));
825 doxyList.push_back(DoxygenEntity(theCommand, aNewList));
826 }
827 }
828
aliasCommand(const std::string & theCommand,const TokenList &,DoxygenEntityList & doxyList)829 void DoxygenParser::aliasCommand(const std::string &theCommand, const TokenList &/* tokList */ , DoxygenEntityList &doxyList) {
830 String *const alias = Getattr(m_node, ("feature:doxygen:alias:" + theCommand).c_str());
831 if (!alias)
832 return;
833
834 doxyList.push_back(DoxygenEntity("plainstd::string", Char(alias)));
835 }
836
getIgnoreFeature(const std::string & theCommand,const char * argument) const837 String *DoxygenParser::getIgnoreFeature(const std::string &theCommand, const char *argument) const {
838 string feature_name = "feature:doxygen:ignore:" + theCommand;
839 if (argument) {
840 feature_name += ':';
841 feature_name += argument;
842 }
843
844 return Getattr(m_node, feature_name.c_str());
845 }
846
getIgnoreFeatureEndCommand(const std::string & theCommand) const847 string DoxygenParser::getIgnoreFeatureEndCommand(const std::string &theCommand) const {
848 // We may be dealing either with a simple command or with the starting command
849 // of a block, as indicated by the value of "range" starting with "end".
850 string endCommand;
851 if (String *const range = getIgnoreFeature(theCommand, "range")) {
852 const char *const p = Char(range);
853 if (strncmp(p, "end", 3) == 0) {
854 if (p[3] == ':') {
855 // Normally the end command name follows after the colon.
856 endCommand = p + 4;
857 } else if (p[3] == '\0') {
858 // But it may be omitted in which case the default Doxygen convention of
859 // using "something"/"endsomething" is used.
860 endCommand = "end" + theCommand;
861 }
862 }
863 }
864
865 return endCommand;
866 }
867
ignoreCommand(const std::string & theCommand,const TokenList & tokList,DoxygenEntityList & doxyList)868 void DoxygenParser::ignoreCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList) {
869 const string endCommand = getIgnoreFeatureEndCommand(theCommand);
870 if (!endCommand.empty()) {
871 TokenListCIt itEnd = getEndCommand(endCommand, tokList);
872 if (itEnd == tokList.end()) {
873 printListError(WARN_DOXYGEN_COMMAND_EXPECTED, "Expected Doxygen command: " + endCommand + ".");
874 return;
875 }
876 // If we ignore the command, also ignore any whitespace preceding it as we
877 // want to avoid having lines consisting of whitespace only or trailing
878 // whitespace in general (at least Python, with its pep8 tool, really
879 // doesn't like it).
880 if (!doxyList.empty()) {
881 DoxygenEntityList::iterator i = doxyList.end();
882 --i;
883 if (i->typeOfEntity == "plainstd::string" && i->data.find_first_not_of(" \t") == std::string::npos) {
884 doxyList.erase(i);
885 }
886 }
887 // Determine what to do with the part of the comment between the start and
888 // end commands: by default, we simply throw it away, but "contents"
889 // attribute may be used to change this.
890 if (String *const contents = getIgnoreFeature(theCommand, "contents")) {
891 // Currently only "parse" is supported but we may need to add "copy" to
892 // handle custom tags which contain text that is supposed to be copied
893 // verbatim in the future.
894 if (Strcmp(contents, "parse") == 0) {
895 DoxygenEntityList aNewList = parse(itEnd, tokList);
896 doxyList.splice(doxyList.end(), aNewList);
897 } else {
898 Swig_error(m_fileName.c_str(), m_fileLineNo, "Invalid \"doxygen:ignore\" feature \"contents\" attribute \"%s\".\n", Char(contents));
899 return;
900 }
901 }
902
903 m_tokenListIt = itEnd;
904 m_tokenListIt++;
905 } else if (String *const range = getIgnoreFeature(theCommand, "range")) {
906 // Currently we only support "line" but, in principle, we should also
907 // support "word" and "paragraph" for consistency with the built-in Doxygen
908 // commands which can have either of these three ranges (which are indicated
909 // using <word-arg>, (line-arg) and {para-arg} respectively in Doxygen
910 // documentation).
911 if (Strcmp(range, "line") == 0) {
912 // Consume everything until the end of line.
913 m_tokenListIt = getOneLine(tokList);
914 skipEndOfLine();
915 } else {
916 Swig_error(m_fileName.c_str(), m_fileLineNo, "Invalid \"doxygen:ignore\" feature \"range\" attribute \"%s\".\n", Char(range));
917 return;
918 }
919 }
920 }
921
addCommand(const std::string & commandString,const TokenList & tokList,DoxygenEntityList & doxyList)922 void DoxygenParser::addCommand(const std::string &commandString, const TokenList &tokList, DoxygenEntityList &doxyList) {
923
924 string theCommand = stringToLower(commandString);
925
926 if (theCommand == "plainstd::string") {
927 string nextPhrase = getStringTilCommand(tokList);
928 if (noisy)
929 cout << "Parsing plain std::string :" << nextPhrase << endl;
930 doxyList.push_back(DoxygenEntity("plainstd::string", nextPhrase));
931 return;
932 }
933
934 switch (commandBelongs(commandString)) {
935 case SIMPLECOMMAND:
936 addSimpleCommand(theCommand, doxyList);
937 break;
938 case COMMANDWORD:
939 addCommandWord(theCommand, tokList, doxyList);
940 break;
941 case COMMANDLINE:
942 addCommandLine(theCommand, tokList, doxyList);
943 break;
944 case COMMANDPARAGRAPH:
945 addCommandParagraph(theCommand, tokList, doxyList);
946 break;
947 case COMMANDENDCOMMAND:
948 addCommandEndCommand(theCommand, tokList, doxyList);
949 break;
950 case COMMANDWORDPARAGRAPH:
951 addCommandWordParagraph(theCommand, tokList, doxyList);
952 break;
953 case COMMANDWORDLINE:
954 addCommandWordLine(theCommand, tokList, doxyList);
955 break;
956 case COMMANDWORDOWORDWORD:
957 addCommandWordOWordOWord(theCommand, tokList, doxyList);
958 break;
959 case COMMANDOWORD:
960 addCommandOWord(theCommand, tokList, doxyList);
961 break;
962 case COMMANDERRORTHROW:
963 addCommandErrorThrow(theCommand, tokList, doxyList);
964 break;
965 case COMMANDUNIQUE:
966 addCommandUnique(theCommand, tokList, doxyList);
967 break;
968 case COMMAND_HTML:
969 addCommandHtml(theCommand, tokList, doxyList);
970 break;
971 case COMMAND_HTML_ENTITY:
972 addCommandHtmlEntity(theCommand, tokList, doxyList);
973 break;
974 case COMMAND_ALIAS:
975 aliasCommand(commandString, tokList, doxyList);
976 break;
977 case COMMAND_IGNORE:
978 ignoreCommand(commandString, tokList, doxyList);
979 break;
980 case NONE:
981 case END_LINE:
982 case PARAGRAPH_END:
983 case PLAINSTRING:
984 case COMMAND:
985 // TODO: Ensure that these values either are correctly ignored here or can't happen.
986 break;
987 }
988 }
989
990 /**
991 * This method converts TokenList to DoxygenEntryList.
992 */
parse(TokenListCIt endParsingIndex,const TokenList & tokList,bool root)993 DoxygenEntityList DoxygenParser::parse(TokenListCIt endParsingIndex, const TokenList &tokList, bool root) {
994 // if we are root, than any strings should be added as 'partofdescription', else as 'plainstd::string'
995 std::string currPlainstringCommandType = root ? "partofdescription" : "plainstd::string";
996 DoxygenEntityList aNewList;
997
998 // Less than check (instead of not equal) is a safeguard in case the
999 // iterator is incremented past the end
1000 while (m_tokenListIt < endParsingIndex) {
1001
1002 Token currToken = *m_tokenListIt;
1003
1004 if (noisy)
1005 cout << "Parsing for phrase starting in:" << currToken.toString() << endl;
1006
1007 if (currToken.m_tokenType == END_LINE) {
1008 aNewList.push_back(DoxygenEntity("plainstd::endl"));
1009 m_tokenListIt++;
1010 } else if (currToken.m_tokenType == COMMAND) {
1011 m_tokenListIt++;
1012 addCommand(currToken.m_tokenString, tokList, aNewList);
1013 } else if (currToken.m_tokenType == PLAINSTRING) {
1014 addCommand(currPlainstringCommandType, tokList, aNewList);
1015 }
1016
1017 // If addCommand above misbehaves, it can move the iterator past endParsingIndex
1018 if (m_tokenListIt > endParsingIndex)
1019 printListError(WARN_DOXYGEN_UNEXPECTED_ITERATOR_VALUE, "Unexpected iterator value in DoxygenParser::parse");
1020
1021 if (endParsingIndex != tokList.end() && m_tokenListIt == tokList.end()) {
1022 // this could happen if we can't reach the original endParsingIndex
1023 printListError(WARN_DOXYGEN_UNEXPECTED_END_OF_COMMENT, "Unexpected end of Doxygen comment encountered.");
1024 break;
1025 }
1026 }
1027 return aNewList;
1028 }
1029
createTree(Node * node,String * documentation)1030 DoxygenEntityList DoxygenParser::createTree(Node *node, String *documentation) {
1031 m_node = node;
1032
1033 tokenizeDoxygenComment(Char(documentation), Char(Getfile(documentation)), Getline(documentation));
1034
1035 if (noisy) {
1036 cout << "---TOKEN LIST---" << endl;
1037 printList();
1038 }
1039
1040 DoxygenEntityList rootList = parse(m_tokenList.end(), m_tokenList, true);
1041
1042 if (noisy) {
1043 cout << "PARSED LIST" << endl;
1044 printTree(rootList);
1045 }
1046 return rootList;
1047 }
1048
1049 /*
1050 * Splits 'text' on 'separator' chars. Separator chars are not part of the
1051 * strings.
1052 */
split(const std::string & text,char separator)1053 DoxygenParser::StringVector DoxygenParser::split(const std::string &text, char separator) {
1054 StringVector lines;
1055 size_t prevPos = 0, pos = 0;
1056
1057 while (pos < string::npos) {
1058 pos = text.find(separator, prevPos);
1059 lines.push_back(text.substr(prevPos, pos - prevPos));
1060 prevPos = pos + 1;
1061 }
1062
1063 return lines;
1064 }
1065
1066 /*
1067 * Returns true, if 'c' is one of doxygen comment block start
1068 * characters: *, /, or !
1069 */
isStartOfDoxyCommentChar(char c)1070 bool DoxygenParser::isStartOfDoxyCommentChar(char c) {
1071 return (strchr("*/!", c) != NULL);
1072 }
1073
1074 /*
1075 * Adds token with Doxygen command to token list, but only if command is one of
1076 * Doxygen commands. In that case true is returned. If the command is not
1077 * recognized as a doxygen command, it is ignored and false is returned.
1078 */
addDoxyCommand(DoxygenParser::TokenList & tokList,const std::string & cmd)1079 bool DoxygenParser::addDoxyCommand(DoxygenParser::TokenList &tokList, const std::string &cmd) {
1080 if (commandBelongs(cmd) != NONE) {
1081 tokList.push_back(Token(COMMAND, cmd));
1082 return true;
1083 } else {
1084 // This function is called for the special Doxygen commands, but also for
1085 // HTML commands (or anything that looks like them, actually) and entities.
1086 // We don't recognize all of those, so just ignore them and pass them
1087 // through, but warn about unknown Doxygen commands as ignoring them will
1088 // often result in wrong output being generated.
1089 const char ch = *cmd.begin();
1090 if (ch != '<' && ch != '&') {
1091 // Before calling printListError() we must ensure that m_tokenListIt used
1092 // by it is valid.
1093 const TokenListCIt itSave = m_tokenListIt;
1094 m_tokenListIt = m_tokenList.end();
1095
1096 printListError(WARN_DOXYGEN_UNKNOWN_COMMAND, "Unknown Doxygen command: " + cmd + ".");
1097
1098 m_tokenListIt = itSave;
1099 }
1100 }
1101
1102 return false;
1103 }
1104
1105 /*
1106 * This method copies comment text to output as it is - no processing is
1107 * done, Doxygen commands are ignored. It is used for commands \verbatim,
1108 * \htmlonly, \f$, \f[, and \f{.
1109 */
processVerbatimText(size_t pos,const std::string & line)1110 size_t DoxygenParser::processVerbatimText(size_t pos, const std::string &line) {
1111 if (line[pos] == '\\' || line[pos] == '@') { // check for end commands
1112
1113 pos++;
1114 size_t endOfWordPos = line.find_first_not_of(DOXYGEN_WORD_CHARS, pos);
1115 string cmd = line.substr(pos, endOfWordPos - pos);
1116
1117 if (cmd == CMD_END_HTML_ONLY || cmd == CMD_END_VERBATIM || cmd == CMD_END_LATEX_1 || cmd == CMD_END_LATEX_2 || cmd == CMD_END_LATEX_3 || cmd == CMD_END_CODE) {
1118
1119 m_isVerbatimText = false;
1120 addDoxyCommand(m_tokenList, cmd);
1121
1122 } else {
1123
1124 m_tokenList.push_back(Token(PLAINSTRING,
1125 // include '\' or '@'
1126 line.substr(pos - 1, endOfWordPos - pos + 1)));
1127 }
1128
1129 pos = endOfWordPos;
1130
1131 } else {
1132
1133 // whitespaces are stored as plain strings
1134 size_t startOfPossibleEndCmd = line.find_first_of("\\@", pos);
1135 m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, startOfPossibleEndCmd - pos)));
1136 pos = startOfPossibleEndCmd;
1137 }
1138
1139 return pos;
1140 }
1141
1142 /*
1143 * Processes doxy commands for escaped characters: \$ \@ \\ \& \~ \< \> \# \% \" \. \::
1144 * Handling this separately supports documentation text like \@someText.
1145 */
processEscapedChars(size_t & pos,const std::string & line)1146 bool DoxygenParser::processEscapedChars(size_t &pos, const std::string &line) {
1147 if ((pos + 1) < line.size()) {
1148
1149 // \ and @ with trailing whitespace or quoted get to output as plain string
1150 string whitespaces = " '\t\n";
1151 if (whitespaces.find(line[pos + 1]) != string::npos) {
1152 m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, 1)));
1153 pos++;
1154 return true;
1155 }
1156 // these chars can be escaped for doxygen
1157 string escapedChars = "$@\\&~<>#%\".";
1158 if (escapedChars.find(line[pos + 1]) != string::npos) {
1159
1160 addDoxyCommand(m_tokenList, line.substr(pos + 1, 1));
1161 pos += 2;
1162 return true;
1163
1164 } else if ((pos + 2) < line.size() && line[pos + 1] == ':' && line[pos + 2] == ':') {
1165
1166 // add command \:: - handling this separately supports documentation
1167 // text like \::someText
1168 addDoxyCommand(m_tokenList, line.substr(pos + 1, 2));
1169 pos += 3;
1170 return true;
1171 }
1172 }
1173 return false;
1174 }
1175
1176 /*
1177 * Processes word doxygen commands, like \arg, \c, \b, \return, ...
1178 */
processWordCommands(size_t & pos,const std::string & line)1179 void DoxygenParser::processWordCommands(size_t &pos, const std::string &line) {
1180 pos++;
1181 size_t endOfWordPos = getEndOfWordCommand(line, pos);
1182
1183 string cmd = line.substr(pos, endOfWordPos - pos);
1184 addDoxyCommand(m_tokenList, cmd);
1185
1186 // A flag for whether we want to skip leading spaces after the command
1187 bool skipLeadingSpace = true;
1188
1189 if (cmd == CMD_HTML_ONLY || cmd == CMD_VERBATIM || cmd == CMD_LATEX_1 || cmd == CMD_LATEX_2 || cmd == CMD_LATEX_3 || getBaseCommand(cmd) == CMD_CODE) {
1190
1191 m_isVerbatimText = true;
1192
1193 // Skipping leading space is necessary with inline \code command,
1194 // and it won't hurt anything for block \code (TODO: are the other
1195 // commands also compatible with skip leading space? If so, just
1196 // do it every time.)
1197 if (getBaseCommand(cmd) == CMD_CODE) skipLeadingSpace = true;
1198 else skipLeadingSpace = false;
1199 } else if (cmd.substr(0,3) == "end") {
1200 // If processing an "end" command such as "endlink", don't skip
1201 // the space before the next string
1202 skipLeadingSpace = false;
1203 }
1204
1205 if (skipLeadingSpace) {
1206 // skip any possible spaces after command, because some commands have parameters,
1207 // and spaces between command and parameter must be ignored.
1208 if (endOfWordPos != string::npos) {
1209 endOfWordPos = line.find_first_not_of(" \t", endOfWordPos);
1210 }
1211 }
1212
1213 pos = endOfWordPos;
1214 }
1215
processHtmlTags(size_t & pos,const std::string & line)1216 void DoxygenParser::processHtmlTags(size_t &pos, const std::string &line) {
1217 bool isEndHtmlTag = false;
1218 pos++;
1219 if (line.size() > pos && line[pos] == '/') {
1220 isEndHtmlTag = true;
1221 pos++;
1222 }
1223
1224 size_t endHtmlPos = line.find_first_of("\t >", pos);
1225
1226 string cmd = line.substr(pos, endHtmlPos - pos);
1227 pos = endHtmlPos;
1228
1229 // prepend '<' to distinguish HTML tags from doxygen commands
1230 if (!cmd.empty() && addDoxyCommand(m_tokenList, '<' + cmd)) {
1231 // it is a valid HTML command
1232 if (line[pos] != '>') {
1233 // it should be HTML tag with args,
1234 // for example <A ...>, <IMG ...>, ...
1235 if (isEndHtmlTag) {
1236 m_tokenListIt = m_tokenList.end();
1237 printListError(WARN_DOXYGEN_HTML_ERROR, "Doxygen HTML error for tag " + cmd + ": Illegal end HTML tag without '>' found.");
1238 }
1239
1240 endHtmlPos = line.find(">", pos);
1241 if (endHtmlPos == string::npos) {
1242 m_tokenListIt = m_tokenList.end();
1243 printListError(WARN_DOXYGEN_HTML_ERROR, "Doxygen HTML error for tag " + cmd + ": HTML tag without '>' found.");
1244 }
1245 // add args of HTML command, like link URL, image URL, ...
1246 m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, endHtmlPos - pos)));
1247 pos = endHtmlPos;
1248 } else {
1249 if (isEndHtmlTag) {
1250 m_tokenList.push_back(Token(PLAINSTRING, END_HTML_TAG_MARK));
1251 } else {
1252 // it is a simple tag, so push empty string
1253 m_tokenList.push_back(Token(PLAINSTRING, ""));
1254 }
1255 }
1256
1257 if (pos != string::npos) {
1258 pos++; // skip '>'
1259 }
1260 } else {
1261 // the command is not HTML supported by Doxygen, < and > will be
1262 // replaced by HTML entities < and > respectively,
1263 addDoxyCommand(m_tokenList, "<");
1264 m_tokenList.push_back(Token(PLAINSTRING, cmd));
1265 }
1266 }
1267
processHtmlEntities(size_t & pos,const std::string & line)1268 void DoxygenParser::processHtmlEntities(size_t &pos, const std::string &line) {
1269 size_t endOfWordPos = line.find_first_not_of("abcdefghijklmnopqrstuvwxyz", pos + 1);
1270
1271 if (endOfWordPos != string::npos) {
1272
1273 if (line[endOfWordPos] == ';' && (endOfWordPos - pos) > 1) {
1274 // if entity is not recognized by Doxygen (not in the list of
1275 // commands) nothing is added (here and in Doxygen).
1276 addDoxyCommand(m_tokenList, line.substr(pos, endOfWordPos - pos));
1277 endOfWordPos++; // skip ';'
1278 } else {
1279 // it is not an entity - add entity for ampersand and the rest of string
1280 addDoxyCommand(m_tokenList, "&");
1281 m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos + 1, endOfWordPos - pos - 1)));
1282 }
1283 }
1284 pos = endOfWordPos;
1285 }
1286
1287 /*
1288 * This method processes normal comment, which has to be tokenized.
1289 */
processNormalComment(size_t pos,const std::string & line)1290 size_t DoxygenParser::processNormalComment(size_t pos, const std::string &line) {
1291 switch (line[pos]) {
1292 case '\\':
1293 case '@':
1294 if (processEscapedChars(pos, line)) {
1295 break;
1296 }
1297 // handle word commands \arg, \c, \return, ... and \f[, \f$, ... commands
1298 processWordCommands(pos, line);
1299 break;
1300
1301 case ' ': // whitespace
1302 case '\t':
1303 {
1304 // whitespaces are stored as plain strings
1305 size_t startOfNextWordPos = line.find_first_not_of(" \t", pos + 1);
1306 m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, startOfNextWordPos - pos)));
1307 pos = startOfNextWordPos;
1308 }
1309 break;
1310
1311 case '<':
1312 processHtmlTags(pos, line);
1313 break;
1314 case '>': // this char is detected here only when it is not part of HTML tag
1315 addDoxyCommand(m_tokenList, ">");
1316 pos++;
1317 break;
1318 case '&':
1319 processHtmlEntities(pos, line);
1320 break;
1321 case '"':
1322 m_isInQuotedString = true;
1323 m_tokenList.push_back(Token(PLAINSTRING, "\""));
1324 pos++;
1325 break;
1326 default:
1327 m_tokenListIt = m_tokenList.end();
1328 printListError(WARN_DOXYGEN_UNKNOWN_CHARACTER, std::string("Unknown special character in Doxygen comment: ") + line[pos] + ".");
1329 }
1330
1331 return pos;
1332 }
1333
1334 /*
1335 * This is the main method, which tokenizes Doxygen comment to words and
1336 * doxygen commands.
1337 */
tokenizeDoxygenComment(const std::string & doxygenComment,const std::string & fileName,int fileLine)1338 void DoxygenParser::tokenizeDoxygenComment(const std::string &doxygenComment, const std::string &fileName, int fileLine) {
1339 m_isVerbatimText = false;
1340 m_isInQuotedString = false;
1341 m_tokenList.clear();
1342 m_fileLineNo = fileLine;
1343 m_fileName = fileName;
1344
1345 StringVector lines = split(doxygenComment, '\n');
1346
1347 // remove trailing spaces, because they cause additional new line at the end
1348 // comment, which is wrong, because these spaces are space preceding
1349 // end of comment : ' */'
1350 if (!doxygenComment.empty() && doxygenComment[doxygenComment.size() - 1] == ' ') {
1351
1352 string lastLine = lines[lines.size() - 1];
1353
1354 if (trim(lastLine).empty()) {
1355 lines.pop_back(); // remove trailing empty line
1356 }
1357 }
1358
1359 for (StringVectorCIt it = lines.begin(); it != lines.end(); it++) {
1360 const string &line = *it;
1361 size_t pos = line.find_first_not_of(" \t");
1362
1363 if (pos == string::npos) {
1364 m_tokenList.push_back(Token(END_LINE, "\n"));
1365 continue;
1366 }
1367 // skip sequences of '*', '/', and '!' of any length
1368 bool isStartOfCommentLineCharFound = false;
1369 while (pos < line.size() && isStartOfDoxyCommentChar(line[pos])) {
1370 pos++;
1371 isStartOfCommentLineCharFound = true;
1372 }
1373
1374 if (pos == line.size()) {
1375 m_tokenList.push_back(Token(END_LINE, "\n"));
1376 continue;
1377 }
1378 // if 'isStartOfCommentLineCharFound' then preserve leading spaces, so
1379 // ' * comment' gets translated to ' * comment', not ' * comment'
1380 // This is important to keep formatting for comments translated to Python.
1381 if (isStartOfCommentLineCharFound && line[pos] == ' ') {
1382 pos++; // points to char after ' * '
1383 if (pos == line.size()) {
1384 m_tokenList.push_back(Token(END_LINE, "\n"));
1385 continue;
1386 }
1387 }
1388 // line[pos] may be ' \t' or start of word, it there was no '*', '/' or '!'
1389 // at beginning of the line. Make sure it points to start of the first word
1390 // in the line.
1391 if (isStartOfCommentLineCharFound) {
1392 size_t firstWordPos = line.find_first_not_of(" \t", pos);
1393 if (firstWordPos == string::npos) {
1394 m_tokenList.push_back(Token(END_LINE, "\n"));
1395 continue;
1396 }
1397
1398 if (firstWordPos > pos) {
1399 m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, firstWordPos - pos)));
1400 pos = firstWordPos;
1401 }
1402 } else {
1403 m_tokenList.push_back(Token(PLAINSTRING, line.substr(0, pos)));
1404 }
1405
1406 while (pos != string::npos) {
1407 // find the end of the word
1408 size_t doxyCmdOrHtmlTagPos = line.find_first_of("\\@<>&\" \t", pos);
1409 if (doxyCmdOrHtmlTagPos != pos) {
1410 // plain text found
1411 // if the last char is punctuation, make it a separate word, otherwise
1412 // it may be included with word also when not appropriate, for example:
1413 // colors are \b red, green, and blue --> colors are <b>red,</b> green, and blue
1414 // instead of (comma not bold):
1415 // colors are \b red, green, and blue --> colors are <b>red</b>, green, and blue
1416 // In Python it looks even worse:
1417 // colors are \b red, green, and blue --> colors are 'red,' green, and blue
1418 string text = line.substr(pos, doxyCmdOrHtmlTagPos - pos);
1419 string punctuations(".,:");
1420 size_t textSize = text.size();
1421
1422 if (!text.empty()
1423 && punctuations.find(text[text.size() - 1]) != string::npos &&
1424 // but do not break ellipsis (...)
1425 !(textSize > 1 && text[textSize - 2] == '.')) {
1426 m_tokenList.push_back(Token(PLAINSTRING, text.substr(0, text.size() - 1)));
1427 m_tokenList.push_back(Token(PLAINSTRING, text.substr(text.size() - 1)));
1428 } else {
1429 m_tokenList.push_back(Token(PLAINSTRING, text));
1430 }
1431 }
1432
1433 pos = doxyCmdOrHtmlTagPos;
1434 if (pos != string::npos) {
1435 if (m_isVerbatimText) {
1436 pos = processVerbatimText(pos, line);
1437
1438 } else if (m_isInQuotedString) {
1439
1440 if (line[pos] == '"') {
1441 m_isInQuotedString = false;
1442 }
1443 m_tokenList.push_back(Token(PLAINSTRING, line.substr(pos, 1)));
1444 pos++;
1445
1446 } else {
1447 pos = processNormalComment(pos, line);
1448 }
1449 }
1450 }
1451 m_tokenList.push_back(Token(END_LINE, "\n")); // add when pos == npos - end of line
1452 }
1453
1454 m_tokenListIt = m_tokenList.begin();
1455 }
1456
printList()1457 void DoxygenParser::printList() {
1458
1459 int tokNo = 0;
1460 for (TokenListCIt it = m_tokenList.begin(); it != m_tokenList.end(); it++, tokNo++) {
1461
1462 cout << it->toString() << " ";
1463
1464 if ((tokNo % TOKENSPERLINE) == 0) {
1465 cout << endl;
1466 }
1467 }
1468 }
1469
printListError(int warningType,const std::string & message)1470 void DoxygenParser::printListError(int warningType, const std::string &message) {
1471 int curLine = m_fileLineNo;
1472 for (TokenListCIt it = m_tokenList.begin(); it != m_tokenListIt; it++) {
1473 if (it->m_tokenType == END_LINE) {
1474 curLine++;
1475 }
1476 }
1477
1478 Swig_warning(warningType, m_fileName.c_str(), curLine, "%s\n", message.c_str());
1479 }
1480