1 /*
2 * TexLogParser.cpp
3 *
4 * Copyright (C) 2021 by RStudio, PBC
5 *
6 * Unless you have received this program directly from RStudio pursuant
7 * to the terms of a commercial license agreement with RStudio, then
8 * this program is licensed to you under the terms of version 3 of the
9 * GNU Affero General Public License. This program is distributed WITHOUT
10 * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12 * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13 *
14 */
15
16 #include <core/tex/TexLogParser.hpp>
17
18 #include <gsl/gsl>
19
20 #include <boost/regex.hpp>
21 #include <boost/lexical_cast.hpp>
22 #include <boost/algorithm/string.hpp>
23
24 #include <shared_core/Error.hpp>
25 #include <shared_core/FilePath.hpp>
26 #include <core/FileSerializer.hpp>
27 #include <core/RegexUtils.hpp>
28 #include <shared_core/SafeConvert.hpp>
29 #include <core/system/System.hpp>
30
31 namespace rstudio {
32 namespace core {
33 namespace tex {
34
35 namespace {
36
37 // Helper function, returns true if str begins with any of these values
beginsWith(const std::string & str,const std::string & test1,const std::string & test2=std::string (),const std::string & test3=std::string (),const std::string & test4=std::string ())38 bool beginsWith(const std::string& str,
39 const std::string& test1,
40 const std::string& test2=std::string(),
41 const std::string& test3=std::string(),
42 const std::string& test4=std::string())
43 {
44 using namespace boost::algorithm;
45 if (starts_with(str, test1))
46 return true;
47
48 if (test2.empty())
49 return false;
50 else if (starts_with(str, test2))
51 return true;
52
53 if (test3.empty())
54 return false;
55 else if (starts_with(str, test3))
56 return true;
57
58 if (test4.empty())
59 return false;
60 else if (starts_with(str, test4))
61 return true;
62
63 return false;
64 }
65
66 // Finds unmatched parens in `line` and puts them in pParens. Can be either
67 // ( or ). Logically the result can only be [zero or more ')'] followed by
68 // [zero or more '('].
findUnmatchedParens(const std::string & line,std::vector<std::string::const_iterator> * pParens)69 void findUnmatchedParens(const std::string& line,
70 std::vector<std::string::const_iterator>* pParens)
71 {
72 // We need to ignore close parens unless they are at the start of a line,
73 // preceded by nothing but whitespace and/or other close parens. Without
74 // this, sample.Rnw has some false positives due to some math errors, e.g.:
75 //
76 // l.204 (x + (y^
77 // 2))
78 //
79 // The first line is ignored because it's part of an error message. The rest
80 // gets parsed and underflows the file stack.
81 bool ignoreCloseParens = false;
82
83 // NOTE: I don't know if it's possible for (<filename> to appear anywhere
84 // but the beginning of the line (preceded only by whitespace(?) and close
85 // parens). But the Sublime Text 2 plugin code seemed to imply that it is
86 // possible.
87
88 for (std::string::const_iterator it = line.begin(); it != line.end(); it++)
89 {
90 switch (*it)
91 {
92 case '(':
93 pParens->push_back(it);
94 ignoreCloseParens = true;
95 break;
96 case ')':
97 if (pParens->empty() || *(pParens->back()) == ')')
98 {
99 if (!ignoreCloseParens)
100 pParens->push_back(it);
101 }
102 else
103 pParens->pop_back();
104 break;
105 case ' ':
106 case '\t':
107 break;
108 default:
109 ignoreCloseParens = true;
110 break;
111 }
112 }
113 }
114
resolveFilename(const FilePath & rootDir,const std::string & filename)115 FilePath resolveFilename(const FilePath& rootDir,
116 const std::string& filename)
117 {
118 std::string result = filename;
119
120 // Remove quotes if necessary
121 if (result.size() > 2 &&
122 boost::algorithm::starts_with(result, "\"") &&
123 boost::algorithm::ends_with(result, "\""))
124 {
125 result.erase(result.size()-1, 1);
126 }
127
128 // Strip leading ./
129 if (boost::algorithm::starts_with(result, "./"))
130 result.erase(0, 2);
131
132 if (result.empty())
133 return FilePath();
134
135 // Check for existence of file
136 FilePath file = rootDir.completePath(result);
137 if (file.exists() && !file.isDirectory())
138 return file;
139 else
140 return FilePath();
141 }
142
143 // TeX wraps lines hard at 79 characters. We use heuristics as described in
144 // Sublime Text's TeX plugin to determine where these breaks are.
unwrapLines(std::vector<std::string> * pLines,std::vector<size_t> * pLinesUnwrapped=nullptr)145 void unwrapLines(std::vector<std::string>* pLines,
146 std::vector<size_t>* pLinesUnwrapped=nullptr)
147 {
148 static boost::regex regexLine("^l\\.(\\d+)\\s");
149 static boost::regex regexAssignment("^\\\\.*?=");
150
151 std::vector<std::string>::iterator pos = pLines->begin();
152
153 for ( ; pos != pLines->end(); pos++)
154 {
155 // The first line is always long, and not artificially wrapped
156 if (pos == pLines->begin())
157 continue;
158
159 if (pos->length() != 79)
160 continue;
161
162 // The **<filename> line may be long, but we don't care about it
163 if (beginsWith(*pos, "**"))
164 continue;
165
166 while (true)
167 {
168 std::vector<std::string>::iterator nextPos = pos + 1;
169 // No more lines to add
170 if (nextPos == pLines->end())
171 break;
172
173 if (nextPos->empty())
174 break;
175
176 // Underfull/Overfull terminator
177 if (*nextPos == " []")
178 break;
179
180 // Common prefixes
181 if (beginsWith(*nextPos, "File:", "Package:", "Document Class:"))
182 break;
183
184 // More prefixes
185 if (beginsWith(*nextPos, "LaTeX Warning:", "LaTeX Info:", "LaTeX2e <"))
186 break;
187
188 if (regex_utils::search(*nextPos, regexAssignment))
189 break;
190
191 if (regex_utils::search(*nextPos, regexLine))
192 break;
193
194 bool breakAfterAppend = nextPos->length() != 79;
195
196 pos->append(*nextPos);
197 // NOTE: Erase is a simple but inefficient way of handling this. Would
198 // be way faster to maintain an output iterator that points to the
199 // correct point in pLines, and when finished, truncate whatever
200 // elements come after the final position of the output iterator.
201 pLines->erase(nextPos, nextPos+1);
202 if (pLinesUnwrapped)
203 pLinesUnwrapped->push_back(1 + (pos - pLines->begin()));
204
205 if (breakAfterAppend)
206 break;
207 }
208 }
209 }
210
211 class FileStack : public boost::noncopyable
212 {
213 public:
FileStack(FilePath rootDir)214 explicit FileStack(FilePath rootDir) : rootDir_(rootDir)
215 {
216 }
217
currentFile()218 FilePath currentFile()
219 {
220 return currentFile_;
221 }
222
processLine(const std::string & line)223 void processLine(const std::string& line)
224 {
225 typedef std::vector<std::string::const_iterator> Iterators;
226 Iterators parens;
227 findUnmatchedParens(line, &parens);
228 for (Iterators::const_iterator itParen = parens.begin();
229 itParen != parens.end();
230 itParen++)
231 {
232 std::string::const_iterator it = *itParen;
233
234 if (*it == ')')
235 {
236 if (!fileStack_.empty())
237 {
238 fileStack_.pop_back();
239 updateCurrentFile();
240 }
241 else
242 {
243 LOG_WARNING_MESSAGE("File context stack underflow while parsing "
244 "TeX log");
245 }
246 }
247 else if (*it == '(')
248 {
249 std::string::const_iterator itFilenameEnd =
250 // case: no other ( on this line
251 (itParen + 1 == parens.end()) ? line.end() :
252 // case: space before next paren, eat it
253 *(*(itParen+1)-1) == ' ' ? *(itParen+1)-1 :
254 // case: other
255 *(itParen+1);
256
257 std::string filename = std::string(it+1, itFilenameEnd);
258 fileStack_.push_back(resolveFilename(rootDir_, filename));
259
260 updateCurrentFile();
261 }
262 else
263 BOOST_ASSERT(false);
264 }
265 }
266
267 private:
268
updateCurrentFile()269 void updateCurrentFile()
270 {
271 for (std::vector<FilePath>::reverse_iterator it = fileStack_.rbegin();
272 it != fileStack_.rend();
273 it++)
274 {
275 if (!it->isEmpty())
276 {
277 currentFile_ = *it;
278 return;
279 }
280 }
281 currentFile_ = FilePath();
282 }
283
284 FilePath rootDir_;
285 FilePath currentFile_;
286 std::vector<FilePath> fileStack_;
287 };
288
texFilePath(const std::string & logPath,const FilePath & compileDir)289 FilePath texFilePath(const std::string& logPath, const FilePath& compileDir)
290 {
291 // some tex compilers report file names with absolute paths and some
292 // report them relative to the compilation directory -- on Posix use
293 // realPath to get a clean full path back
294
295 FilePath path = compileDir.completePath(logPath);
296 FilePath realPath;
297 Error error = core::system::realPath(path, &realPath);
298 if (error)
299 {
300 // log any error which isn't no such file or directory
301 if (error != systemError(boost::system::errc::no_such_file_or_directory, ErrorLocation()))
302 {
303 LOG_ERROR(error);
304 }
305
306 return path;
307 }
308 else
309 {
310 return realPath;
311 }
312 }
313
calculateWrappedLine(const std::vector<size_t> & unwrappedLines,size_t unwrappedLineNum)314 size_t calculateWrappedLine(const std::vector<size_t>& unwrappedLines,
315 size_t unwrappedLineNum)
316 {
317 for (std::vector<size_t>::const_iterator it = unwrappedLines.begin();
318 it != unwrappedLines.end();
319 it++)
320 {
321 if (*it >= unwrappedLineNum)
322 {
323 return unwrappedLineNum + (it - unwrappedLines.begin());
324 }
325 }
326
327 return unwrappedLineNum + unwrappedLines.size();
328 }
329
330 } // anonymous namespace
331
parseLatexLog(const FilePath & logFilePath,LogEntries * pLogEntries)332 Error parseLatexLog(const FilePath& logFilePath, LogEntries* pLogEntries)
333 {
334 static boost::regex regexOverUnderfullLines(" at lines (\\d+)--(\\d+)\\s*(?:\\[])?$");
335 static boost::regex regexWarning("^(?:.*?) Warning: (.+)");
336 static boost::regex regexWarningEnd(" input line (\\d+)\\.$");
337 static boost::regex regexLnn("^l\\.(\\d+)\\s");
338 static boost::regex regexCStyleError("^(.+):(\\d+):\\s(.+)$");
339
340 std::vector<std::string> lines;
341 Error error = readStringVectorFromFile(logFilePath, &lines, false);
342 if (error)
343 return error;
344
345 std::vector<size_t> linesUnwrapped;
346 unwrapLines(&lines, &linesUnwrapped);
347
348 FilePath rootDir = logFilePath.getParent();
349 FileStack fileStack(rootDir);
350
351 for (std::vector<std::string>::const_iterator it = lines.begin();
352 it != lines.end();
353 it++)
354 {
355 const std::string& line = *it;
356 auto logLineNum = (it - lines.begin()) + 1;
357
358 // We slurp overfull/underfull messages with no further processing
359 // (i.e. not manipulating the file stack)
360
361 if (beginsWith(line, "Overfull ", "Underfull "))
362 {
363 std::string msg = line;
364 int lineNum = -1;
365
366 // Parse lines, if present
367 boost::smatch overUnderfullLinesMatch;
368 if (regex_utils::search(line,
369 overUnderfullLinesMatch,
370 regexOverUnderfullLines))
371 {
372 lineNum = safe_convert::stringTo<int>(overUnderfullLinesMatch[1],
373 -1);
374 }
375
376 // Single line case
377 bool singleLine = boost::algorithm::ends_with(line, "[]");
378
379 if (singleLine)
380 {
381 msg.erase(line.size()-2, 2);
382 boost::algorithm::trim_right(msg);
383 }
384
385 pLogEntries->push_back(LogEntry(logFilePath,
386 gsl::narrow_cast<int>(calculateWrappedLine(
387 linesUnwrapped,
388 logLineNum)),
389 LogEntry::Box,
390 fileStack.currentFile(),
391 lineNum,
392 msg));
393
394 if (singleLine)
395 continue;
396
397 for (; it != lines.end(); it++)
398 {
399 // For multi-line case, we're looking for " []" on a line by itself
400 if (*it == " []")
401 break;
402 }
403
404 // The iterator would be incremented by the outer for loop, must not
405 // let it go past the end! (If we did get to the end, it would
406 // mean the log file was malformed, but we still can't crash in this
407 // situation.)
408 if (it == lines.end())
409 break;
410 else
411 continue;
412 }
413
414 fileStack.processLine(line);
415
416 // Now see if it's an error or warning
417
418 if (beginsWith(line, "! "))
419 {
420 std::string errorMsg = line.substr(2);
421 int lineNum = -1;
422
423 boost::smatch match;
424 for (it++; it != lines.end(); it++)
425 {
426 if (regex_utils::search(*it, match, regexLnn))
427 {
428 lineNum = safe_convert::stringTo<int>(match[1], -1);
429 break;
430 }
431 }
432
433 pLogEntries->push_back(LogEntry(logFilePath,
434 gsl::narrow_cast<int>(calculateWrappedLine(
435 linesUnwrapped,
436 logLineNum)),
437 LogEntry::Error,
438 fileStack.currentFile(),
439 lineNum,
440 errorMsg));
441
442 // The iterator would be incremented by the outer for loop, must not
443 // let it go past the end! (If we did get to the end, it would
444 // mean the log file was malformed, but we still can't crash in this
445 // situation.)
446 if (it == lines.end())
447 break;
448 else
449 continue;
450 }
451
452 boost::smatch warningMatch;
453 if (regex_utils::search(line, warningMatch, regexWarning))
454 {
455 std::string warningMsg = warningMatch[1];
456 int lineNum = -1;
457 while (true)
458 {
459 if (boost::algorithm::ends_with(warningMsg, "."))
460 {
461 boost::smatch warningEndMatch;
462 if (regex_utils::search(*it, warningEndMatch, regexWarningEnd))
463 {
464 lineNum = safe_convert::stringTo<int>(warningEndMatch[1], -1);
465 }
466 break;
467 }
468
469 if (++it == lines.end())
470 break;
471 warningMsg.append(*it);
472 }
473
474 pLogEntries->push_back(LogEntry(logFilePath,
475 gsl::narrow_cast<int>(calculateWrappedLine(
476 linesUnwrapped,
477 logLineNum)),
478 LogEntry::Warning,
479 fileStack.currentFile(),
480 lineNum,
481 warningMsg));
482
483 // The iterator would be incremented by the outer for loop, must not
484 // let it go past the end! (If we did get to the end, it would
485 // mean the log file was malformed, but we still can't crash in this
486 // situation.)
487 if (it == lines.end())
488 break;
489 else
490 continue;
491 }
492
493 boost::smatch cStyleErrorMatch;
494 if (regex_utils::search(line, cStyleErrorMatch, regexCStyleError))
495 {
496 FilePath cstyleFile = resolveFilename(rootDir, cStyleErrorMatch[1]);
497 if (cstyleFile.exists())
498 {
499 int lineNum = safe_convert::stringTo<int>(cStyleErrorMatch[2], -1);
500 pLogEntries->push_back(LogEntry(logFilePath,
501 gsl::narrow_cast<int>(calculateWrappedLine(
502 linesUnwrapped,
503 logLineNum)),
504 LogEntry::Error,
505 cstyleFile,
506 lineNum,
507 cStyleErrorMatch[3]));
508 }
509 }
510 }
511
512 return Success();
513 }
514
parseBibtexLog(const FilePath & logFilePath,LogEntries * pLogEntries)515 Error parseBibtexLog(const FilePath& logFilePath, LogEntries* pLogEntries)
516 {
517 boost::regex re("^(.*)---line ([0-9]+) of file (.*)$");
518
519 // get the lines
520 std::vector<std::string> lines;
521 Error error = core::readStringVectorFromFile(logFilePath, &lines, false);
522 if (error)
523 return error;
524
525 // look for error messages
526 for (std::vector<std::string>::const_iterator it = lines.begin();
527 it != lines.end();
528 it++)
529 {
530 boost::smatch match;
531 if (regex_utils::match(*it, match, re))
532 {
533 pLogEntries->push_back(
534 LogEntry(
535 logFilePath,
536 gsl::narrow_cast<int>((it - lines.begin()) + 1),
537 LogEntry::Error,
538 texFilePath(match[3], logFilePath.getParent()),
539 boost::lexical_cast<int>(match[2]),
540 match[1]));
541 }
542 }
543
544 return Success();
545 }
546
547 } // namespace tex
548 } // namespace core
549 } // namespace rstudio
550
551
552
553