1 /**********************************************************************************
2 *   Copyright (C) 2003 by Jeroen Wijnhout (Jeroen.Wijnhout@kdemail.net)           *
3 *                 2005-2007 by Holger Danielsson (holger.danielsson@versanet.de)  *
4 *                 2006-2019 by Michel Ludwig (michel.ludwig@kdemail.net)          *
5 ***********************************************************************************/
6 
7 /***************************************************************************
8  *                                                                         *
9  *   This program is free software; you can redistribute it and/or modify  *
10  *   it under the terms of the GNU General Public License as published by  *
11  *   the Free Software Foundation; either version 2 of the License, or     *
12  *   (at your option) any later version.                                   *
13  *                                                                         *
14  ***************************************************************************/
15 
16 #include "latexparser.h"
17 
18 #include <QFileInfo>
19 #include <QRegExp>
20 
21 #include <KLocalizedString>
22 
23 #include "codecompletion.h"
24 #include "parserthread.h"
25 
26 namespace KileParser {
27 
LaTeXParserInput(const QUrl & url,QStringList textLines,KileDocument::Extensions * extensions,const QMap<QString,KileStructData> & dictStructLevel,bool showSectioningLabels,bool showStructureTodo)28 LaTeXParserInput::LaTeXParserInput(const QUrl &url, QStringList textLines,
29                                    KileDocument::Extensions *extensions,
30                                    const QMap<QString, KileStructData>& dictStructLevel,
31                                    bool showSectioningLabels,
32                                    bool showStructureTodo)
33     : ParserInput(url),
34       textLines(textLines),
35       extensions(extensions),
36       // make a copy here as otherwise the parsing of a document that is being closed
37       // can lead to a crash:
38       dictStructLevel(dictStructLevel),
39       showSectioningLabels(showSectioningLabels),
40       showStructureTodo(showStructureTodo)
41 {
42 }
43 
LaTeXParserOutput()44 LaTeXParserOutput::LaTeXParserOutput()
45     : bIsRoot(false)
46 {
47 }
48 
~LaTeXParserOutput()49 LaTeXParserOutput::~LaTeXParserOutput()
50 {
51     qCDebug(LOG_KILE_PARSER);
52 }
53 
LaTeXParser(ParserThread * parserThread,LaTeXParserInput * input,QObject * parent)54 LaTeXParser::LaTeXParser(ParserThread *parserThread, LaTeXParserInput *input,
55                          QObject *parent)
56     : Parser(parserThread, parent),
57       m_extensions(input->extensions),
58       m_textLines(input->textLines),
59       m_dictStructLevel(input->dictStructLevel),
60       m_showSectioningLabels(input->showSectioningLabels),
61       m_showStructureTodo(input->showStructureTodo)
62 {
63 }
64 
~LaTeXParser()65 LaTeXParser::~LaTeXParser()
66 {
67     qCDebug(LOG_KILE_PARSER);
68 }
69 
matchBracket(const QStringList & textLines,int & l,int & pos)70 BracketResult LaTeXParser::matchBracket(const QStringList& textLines, int &l, int &pos)
71 {
72     BracketResult result;
73     TodoResult todo;
74 
75     if((getTextLine(textLines, l))[pos] == '[') {
76         result.option = Parser::matchBracket(textLines, '[', l, pos);
77         int p = 0;
78         while(l < textLines.size()) {
79             if((p = processTextline(getTextLine(textLines, l), todo).indexOf('{', pos)) != -1) {
80                 pos = p;
81                 break;
82             }
83             else {
84                 pos = 0;
85                 ++l;
86             }
87         }
88     }
89 
90     if((getTextLine(textLines, l))[pos] == '{') {
91         result.line = l;
92         result.col = pos;
93         result.value = Parser::matchBracket(textLines, '{', l, pos);
94     }
95 
96     return result;
97 }
98 
99 //FIXME: this has to be completely rewritten!
parse()100 ParserOutput* LaTeXParser::parse()
101 {
102     LaTeXParserOutput *parserOutput = new LaTeXParserOutput();
103 
104     qCDebug(LOG_KILE_PARSER) << m_textLines;
105 
106     QMap<QString,KileStructData>::const_iterator it;
107     static QRegExp reCommand("(\\\\[a-zA-Z]+)\\s*\\*?\\s*(\\{|\\[)");
108     static QRegExp reRoot("\\\\documentclass|\\\\documentstyle");
109     static QRegExp reBD("\\\\begin\\s*\\{\\s*document\\s*\\}");
110     static QRegExp reReNewCommand("\\\\renewcommand.*$");
111     static QRegExp reNumOfParams("\\s*\\[([1-9]+)\\]");
112     static QRegExp reNumOfOptParams("\\s*\\[([1-9]+)\\]\\s*\\[([^\\{]*)\\]"); // the quantifier * isn't used by mistake, because also emtpy optional brackets are correct.
113 
114     int tagStart, bd = 0;
115     int tagEnd, tagLine = 0, tagCol = 0;
116     int tagStartLine = 0, tagStartCol = 0;
117     BracketResult result;
118     QString m, s, shorthand;
119     bool foundBD = false; // found \begin { document }
120     bool fire = true; //whether or not we should emit a foundItem signal
121     bool fireSuspended; // found an item, but it should not be fired (this time)
122     TodoResult todo;
123 
124 // 	emit(parsingStarted(m_doc->lines()));
125     for(int i = 0; i < m_textLines.size(); ++i) {
126         if(!m_parserThread->shouldContinueDocumentParsing()) {
127             qCDebug(LOG_KILE_PARSER) << "stopping...";
128             delete(parserOutput);
129             return Q_NULLPTR;
130         }
131 
132 //		emit(parsingUpdate(i));
133 
134         tagStart = tagEnd = 0;
135         fire = true;
136         s = processTextline(getTextLine(m_textLines, i), todo);
137         if(todo.type != -1 && m_showStructureTodo) {
138             QString folder = (todo.type == KileStruct::ToDo) ? "todo" : "fixme";
139             parserOutput->structureViewItems.push_back(new StructureViewItem(todo.comment, i+1, todo.colComment, todo.type, KileStruct::Object, i+1, todo.colTag, QString(), folder));
140         }
141 
142 
143         if(s.isEmpty()) {
144             continue;
145         }
146 
147         //ignore renewcommands
148         s.remove(reReNewCommand);
149 
150         //find all commands in this line
151         while(tagStart != -1) {
152             if((!foundBD) && ((bd = s.indexOf(reBD, tagEnd)) != -1)) {
153                 qCDebug(LOG_KILE_PARSER) << "\tfound \\begin{document}";
154                 foundBD = true;
155                 parserOutput->preamble.clear();
156 //FIXME: improve this
157                 if(bd == 0) {
158                     if(i - 1 >= 0) {
159                         for(int j = 0; j <= i - 1; ++j) {
160                             parserOutput->preamble += getTextLine(m_textLines, j) + '\n';
161                         }
162                     }
163                 }
164                 else {
165                     if(i - 1 >= 0) {
166                         for(int j = 0; j <= i - 1; ++j) {
167                             parserOutput->preamble += getTextLine(m_textLines, j) + '\n';
168                         }
169                     }
170                     parserOutput->preamble += getTextLine(m_textLines, i).left(bd) + '\n';
171                 }
172             }
173 
174             if((!foundBD) && (s.indexOf(reRoot, tagEnd) != -1)) {
175                 qCDebug(LOG_KILE_PARSER) << "\tsetting m_bIsRoot to true";
176                 tagEnd += reRoot.cap(0).length();
177                 parserOutput->bIsRoot = true;
178             }
179 
180             tagStart = reCommand.indexIn(s, tagEnd);
181             m.clear();
182             shorthand.clear();
183 
184             if(tagStart != -1) {
185                 tagEnd = tagStart + reCommand.cap(0).length()-1;
186 
187                 //look up the command in the dictionary
188                 it = m_dictStructLevel.constFind(reCommand.cap(1));
189 
190                 //if it is was a structure element, find the title (or label)
191                 if(it != m_dictStructLevel.constEnd()) {
192                     tagLine = i+1;
193                     tagCol = tagEnd+1;
194                     tagStartLine = tagLine;
195                     tagStartCol = tagStart+1;
196 
197                     if(reCommand.cap(1) != "\\frame") {
198                         result = matchBracket(m_textLines, i, tagEnd);
199                         m = result.value.trimmed();
200                         shorthand = result.option.trimmed();
201                         if(i >= tagLine) { //matching brackets spanned multiple lines
202                             s = getTextLine(m_textLines, i);
203                         }
204                         if(result.line > 0 || result.col > 0) {
205                             tagLine = result.line + 1;
206                             tagCol = result.col + 1;
207                         }
208                         //qCDebug(LOG_KILE_PARSER) << "\tgrabbed: " << reCommand.cap(1) << "[" << shorthand << "]{" << m << "}";
209                     }
210                     else {
211                         m = i18n("Frame");
212                     }
213                 }
214 
215                 //title (or label) found, add the element to the listview
216                 if(!m.isNull()) {
217                     // no problems so far ...
218                     fireSuspended = false;
219 
220                     // remove trailing ./
221                     if((*it).type & (KileStruct::Input | KileStruct::Graphics)) {
222                         if(m.left(2) == "./") {
223                             m = m.mid(2, m.length() - 2);
224                         }
225                     }
226                     // update parameter for environments, because only
227                     // floating environments and beamer frames are passed
228                     if ( (*it).type == KileStruct::BeginEnv )
229                     {
230                         if ( m=="figure" || m=="figure*" || m=="table" || m=="table*" )
231                         {
232                             it = m_dictStructLevel.constFind("\\begin{" + m +'}');
233                         }
234                         else if(m == "asy") {
235                             it = m_dictStructLevel.constFind("\\begin{" + m +'}');
236                             parserOutput->asyFigures.append(m);
237                         }
238                         else if(m == "frame") {
239                             const QString untitledFrameDisplayName = i18n("Frame");
240                             it = m_dictStructLevel.constFind("\\begin{frame}");
241                             if(tagEnd+1 < s.size() && s.at(tagEnd+1) == '{') {
242                                 tagEnd++;
243                                 result = matchBracket(m_textLines, i, tagEnd);
244                                 m = result.value.trimmed();
245                                 if(m.isEmpty()) {
246                                     m = untitledFrameDisplayName;
247                                 }
248                             }
249                             else {
250                                 m = untitledFrameDisplayName;
251                             }
252                         }
253                         else if(m=="block" || m=="exampleblock" || m=="alertblock") {
254                             const QString untitledBlockDisplayName = i18n("Untitled Block");
255                             it = m_dictStructLevel.constFind("\\begin{block}");
256                             if(tagEnd+1 < s.size() && s.at(tagEnd+1) == '{') {
257                                 tagEnd++;
258                                 result = matchBracket(m_textLines, i, tagEnd);
259                                 m = result.value.trimmed();
260                                 if(m.isEmpty()) {
261                                     m = untitledBlockDisplayName;
262                                 }
263                             }
264                             else {
265                                 m = untitledBlockDisplayName;
266                             }
267                         }
268                         else {
269                             fireSuspended = true;    // only floats and beamer frames, no other environments
270                         }
271                     }
272 
273                     // tell structure view that a floating environment or a beamer frame must be closed
274                     else if ( (*it).type == KileStruct::EndEnv )
275                     {
276                         if ( m=="figure" || m== "figure*" || m=="table" || m=="table*" || m=="asy")
277                         {
278                             it = m_dictStructLevel.constFind("\\end{float}");
279                         }
280                         else if(m == "frame") {
281                             it = m_dictStructLevel.constFind("\\end{frame}");
282                         }
283                         else {
284                             fireSuspended = true;          // only floats, no other environments
285                         }
286                     }
287                     // sectioning commands
288                     else if((*it).type == KileStruct::Sect) {
289                         if(!shorthand.isEmpty()) {
290                             m = shorthand;
291                         }
292                     }
293 
294                     // update the label list
295                     else if((*it).type == KileStruct::Label) {
296                         parserOutput->labels.append(m);
297                         // label entry as child of sectioning
298                         if(m_showSectioningLabels) {
299                             parserOutput->structureViewItems.push_back(new StructureViewItem(m, tagLine, tagCol, KileStruct::Label, KileStruct::Object, tagStartLine, tagStartCol, "label", "root"));
300                             fireSuspended = true;
301                         }
302                     }
303 
304                     // update the references list
305                     else if((*it).type == KileStruct::Reference) {
306                         // m_references.append(m);
307                         //fireSuspended = true;          // don't emit references
308                     }
309 
310                     // update the dependencies
311                     else if((*it).type == KileStruct::Input) {
312                         // \input- or \include-commands can be used without extension. So we check
313                         // if an extension exists. If not the default extension is added
314                         // ( LaTeX reference says that this is '.tex'). This assures that
315                         // all files, which are listed in the structure view, have an extension.
316                         QString ext = QFileInfo(m).completeSuffix();
317                         if(ext.isEmpty()) {
318                             m += m_extensions->latexDocumentDefault();
319                         }
320                         parserOutput->deps.append(m);
321                     }
322 
323                     // update the referenced Bib files
324                     else  if((*it).type == KileStruct::Bibliography) {
325                         qCDebug(LOG_KILE_PARSER) << "===TeXInfo::updateStruct()===appending Bibiliograph file(s) " << m;
326 
327                         const QStringList bibs = m.split(',');
328                         QString biblio;
329 
330                         // assure that all files have an extension
331                         const QString bibtexExtension = m_extensions->bibtexDefault();
332                         for(QString biblio : bibs) {
333                             biblio = biblio.trimmed();
334                             {
335                                 QString ext = QFileInfo(biblio).suffix();
336                                 if(ext.isEmpty()) {
337                                     biblio += m_extensions->bibtexDefault();
338                                 }
339                             }
340                             parserOutput->bibliography.append(biblio);
341                             if(biblio.left(2) == "./") {
342                                 biblio = biblio.mid(2, biblio.length() - 2);
343                             }
344                             parserOutput->deps.append(biblio);
345                             parserOutput->structureViewItems.push_back(new StructureViewItem(biblio, tagLine, tagCol, (*it).type, (*it).level, tagStartLine, tagStartCol, (*it).pix, (*it).folder));
346                         }
347                         fire = false;
348                     }
349 
350                     // update the bibitem list
351                     else if((*it).type == KileStruct::BibItem) {
352                         //qCDebug(LOG_KILE_PARSER) << "\tappending bibitem " << m;
353                         parserOutput->bibItems.append(m);
354                     }
355 
356                     // update the package list
357                     else if((*it).type == KileStruct::Package) {
358                         QStringList pckgs = m.split(',');
359                         uint cumlen = 0;
360                         for(int p = 0; p < pckgs.count(); ++p) {
361                             QString package = pckgs[p].trimmed();
362                             if(!package.isEmpty()) {
363                                 parserOutput->packages.append(package);
364                                 // hidden, so emit is useless
365                                 // emit( foundItem(package, tagLine, tagCol+cumlen, (*it).type, (*it).level, tagStartLine, tagStartCol, (*it).pix, (*it).folder) );
366                                 cumlen += package.length() + 1;
367                             }
368                         }
369                         fire = false;
370                     }
371 
372                     // newcommand found, add it to the newCommands list
373                     else if((*it).type & (KileStruct::NewCommand | KileStruct::NewEnvironment)) {
374                         QString optArg, mandArgs;
375 
376                         //find how many parameters this command takes
377                         if(s.indexOf(reNumOfParams, tagEnd + 1) != -1) {
378                             bool ok;
379                             int noo = reNumOfParams.cap(1).toInt(&ok);
380 
381                             if(ok) {
382                                 if(s.indexOf(reNumOfOptParams, tagEnd + 1) != -1) {
383                                     qCDebug(LOG_KILE_PARSER) << "Opt param is " << reNumOfOptParams.cap(2) << "%EOL";
384                                     noo--; // if we have an opt argument, we have one mandatory argument less, and noo=0 can't occur because then latex complains (and we don't macht them with reNumOfParams either)
385                                     optArg = '[' + reNumOfOptParams.cap(2) + ']';
386                                 }
387 
388                                 for(int noo_index = 0; noo_index < noo; ++noo_index) {
389                                     mandArgs +=  '{' + s_bullet + '}';
390                                 }
391 
392                             }
393                             if(!optArg.isEmpty()) {
394                                 if((*it).type == KileStruct::NewEnvironment) {
395                                     parserOutput->newCommands.append(QString("\\begin{%1}%2%3").arg(m).arg(optArg).arg(mandArgs));
396                                 }
397                                 else {
398                                     parserOutput->newCommands.append(m + optArg + mandArgs);
399                                 }
400                             }
401                         }
402                         if((*it).type == KileStruct::NewEnvironment) {
403                             parserOutput->newCommands.append(QString("\\begin{%1}%3").arg(m).arg(mandArgs));
404                             parserOutput->newCommands.append(QString("\\end{%1}").arg(m));
405                         }
406                         else {
407                             parserOutput->newCommands.append(m + mandArgs);
408                         }
409                         //FIXME  set tagEnd to the end of the command definition
410                         break;
411                     }
412                     // and some other commands, which don't need special actions:
413                     // \caption, ...
414 
415                     // qCDebug(LOG_KILE_PARSER) << "\t\temitting: " << m;
416                     if(fire && !fireSuspended) {
417                         parserOutput->structureViewItems.push_back(new StructureViewItem(m, tagLine, tagCol, (*it).type, (*it).level, tagStartLine, tagStartCol, (*it).pix, (*it).folder));
418                     }
419                 } //if m
420             } // if tagStart
421         } // while tagStart
422     } //for
423 
424     qCDebug(LOG_KILE_PARSER) << "done";
425     return parserOutput;
426 }
427 
428 
429 }
430 
431