1 #include "smallUsefulFunctions.h"
2 #include "qdocumentline_p.h"
3 #include "qdocument.h"
4 #include <QBuffer>
5 #include "latexparser/latexparser.h"
6 
7 using std::map;
8 using std::pair;
9 
10 using CharMap = map<pair<char,char>,int>;
11 
12 /*
13 QList<QPair<QString,QString> > latexToPlainWordReplaceList =
14  QList<QPair<QString,QString> >()
15  << QPair<QString, QString> ("\\-","") //Trennung [separation] (german-babel-package also: \")
16  << QPair<QString, QString> ("\\/","") //ligatur preventing (german-package also: "|)
17  << QPair<QString, QString> ("\"~","-") //- ohne Trennung (without separation)
18 //german-babel-package: "- (\- but also normal break),  "= ( like - but also normal break), "" (umbruch ohne bindestrich)
19  << QPair<QString, QString> ("\"-","")
20  << QPair<QString, QString> ("\"a","\xE4")
21  << QPair<QString, QString> ("\"o","\xF6")
22  << QPair<QString, QString> ("\"u","\xFC")
23  << QPair<QString, QString> ("\"s","\xDF")
24  << QPair<QString, QString> ("\"A","\xC4")
25  << QPair<QString, QString> ("\"O","\xD6")
26  << QPair<QString, QString> ("\"U","\xDC")
27  << QPair<QString, QString> ("\\\"{a}","\xE4")
28  << QPair<QString, QString> ("\\\"{o}","\xF6")
29  << QPair<QString, QString> ("\\\"{u}","\xFC")
30  << QPair<QString, QString> ("\\\"{A}","\xC4")
31  << QPair<QString, QString> ("\\\"{O}","\xD6")
32  << QPair<QString, QString> ("\\\"{U}","\xDC")
33  << QPair<QString, QString> ("\"|","")
34  << QPair<QString, QString> ("\"","")
35 //	<< QPair<QString, QString> ("\"\"","") redunant
36  << QPair<QString, QString> ("\\",""); // eliminating backslash which might remain from accents like \"a ...
37 */
38 
39 const CharMap characters {
40 
41     //  Umlaut
42 
43     { { '"' , 'a' } , 0xE4 },
44     { { '"' , 'e' } , 0xEB },
45     { { '"' , 'i' } , 0xEF },
46     { { '"' , 'o' } , 0xF6 },
47     { { '"' , 'u' } , 0xFC },
48     { { '"' , 'A' } , 0xC4 },
49     { { '"' , 'E' } , 0xCB },
50     { { '"' , 'I' } , 0xCF },
51     { { '"' , 'O' } , 0xD6 },
52     { { '"' , 'U' } , 0xDC },
53     { { '"' , 's' } , 0xDF },
54 
55     //  Grave
56 
57     { { '`' , 'a' } , 0xE0 },
58     { { '`' , 'e' } , 0xE8 },
59     { { '`' , 'i' } , 0xEC },
60     { { '`' , 'o' } , 0xF2 },
61     { { '`' , 'u' } , 0xF9 },
62     { { '`' , 'A' } , 0xC0 },
63     { { '`' , 'E' } , 0xC8 },
64     { { '`' , 'I' } , 0xCC },
65     { { '`' , 'O' } , 0xD2 },
66     { { '`' , 'U' } , 0xD9 },
67 
68     //  Acute
69 
70     { { '\'' , 'a' } , 0xE1 },
71     { { '\'' , 'e' } , 0xE9 },
72     { { '\'' , 'i' } , 0xED },
73     { { '\'' , 'o' } , 0xF3 },
74     { { '\'' , 'u' } , 0xFA },
75     { { '\'' , 'y' } , 0xFD },
76     { { '\'' , 'A' } , 0xC1 },
77     { { '\'' , 'E' } , 0xC9 },
78     { { '\'' , 'I' } , 0xCD },
79     { { '\'' , 'O' } , 0xD3 },
80     { { '\'' , 'U' } , 0xDA },
81     { { '\'' , 'Y' } , 0xDD },
82 
83     //  Circumflex
84 
85     { { '^' , 'a' } , 0xE2 },
86     { { '^' , 'e' } , 0xEA },
87     { { '^' , 'i' } , 0xEE },
88     { { '^' , 'o' } , 0xF4 },
89     { { '^' , 'u' } , 0xFB },
90     { { '^' , 'A' } , 0xC2 },
91     { { '^' , 'E' } , 0xCA },
92     { { '^' , 'I' } , 0xCE },
93     { { '^' , 'O' } , 0xD4 },
94     { { '^' , 'U' } , 0xDB },
95 
96     //  Tilde
97 
98     { { '~' , 'a' } , 0xE3 },
99     { { '~' , 'n' } , 0xF1 },
100     { { '~' , 'o' } , 0xF5 },
101     { { '~' , 'A' } , 0xC3 },
102     { { '~' , 'N' } , 0xD1 },
103     { { '~' , 'O' } , 0xD5 },
104 
105     //  Cedille
106 
107     { { 'c' , 'c' } , 0xE7 },
108     { { 'c' , 'C' } , 0xC7 }
109 };
110 
111 /*!
112  * \brief transformCharacter
113  * Transform a character from a tex encoded to utf
114  * e.g. "a -> ä
115  * \param c
116  * \param context
117  * \return tranformed character
118  */
transformCharacter(const QChar & character,const QChar & context)119 QChar transformCharacter(const QChar & character,const QChar & context){
120 
121     auto transformation = characters.find({ context.toLatin1() , character.toLatin1() });
122 
123     if(transformation == characters.end())
124         return character;
125 
126     return QChar(transformation -> second);
127 }
128 
latexToPlainWord(const QString & word)129 QString latexToPlainWord(const QString &word)
130 {
131 
132 	/*	QString result=word;
133 	for (QList<QPair<QString,QString> >::const_iterator it=latexToPlainWordReplaceList.begin(); it!=latexToPlainWordReplaceList.end(); ++it)
134 	result.replace(it->first,it->second);*/
135 	QString result;
136 	result.reserve(word.length());
137 	for (int i = 0; i < word.length(); i++) {
138 		if (word[i] == '\\') {
139 			//decode all meta characters starting with a backslash (c++ syntax: don't use an actual backslash there or it creates a multi line comment)
140 			i++;
141 			if (i >= word.length()) break;
142 			switch (word[i].toLatin1()) {
143 			case '-': //Trennung [separation] (german-babel-package also: \")
144 			case '/': //ligatur preventing (german-package also: "|)
145 				break;
146 
147 			case '"':
148 			case '\'':
149 			case '^':
150 			case '`':
151 			case '~':
152 			case 'c':
153 				if (i + 3 < word.length()) {
154 					if (word[i + 1] == '{' && word[i + 3] == '}') {
155 						result.append(transformCharacter(word[i + 2], word[i]));
156 						i += 3;
157 						break;
158 					}
159 				}
160 				if (i + 1 < word.length()) {
161 					if (word[i + 1] == '\\' || word[i + 1] == '"')
162 						break;  //ignore "
163 					result.append(transformCharacter(word[i + 1], word[i]));
164 					i++;
165 					break;
166 				}
167 				i--; //repeat with "
168 				break;
169 			default:
170 				i--; //repeat with current char
171 			}
172 		} /* else if (word[i] == '"') {     // replacement from german package is handled extra
173 			//decode all meta characters starting with "
174 			i++;
175 			if (i>=word.length()) break;
176 			switch (word[i].toLatin1()) {
177 			case '~':
178 				result.append('-'); //- ohne Trennung (without separation)
179 				break;
180 			case '-':
181 			case '|': //babel package, separation
182 			case '"':  //ignore ""
183 				break;
184 			default:
185                 result.append(transformCharacter(word[i], '"'));
186 
187 			}
188         }*/ else result.append(word[i]);
189 	}
190 
191 	return result;
192 }
latexToPlainWordwithReplacementList(const QString & word,QMap<QString,QString> & replacementList)193 QString latexToPlainWordwithReplacementList(const QString &word, QMap<QString, QString> &replacementList )
194 {
195 	QString result;
196 	QString w = latexToPlainWord(word);
197     if (!replacementList.isEmpty()){
198         while (!w.isEmpty()) {
199             bool replaced = false;
200             foreach (const QString elem, replacementList.keys()) {
201                 if (w.startsWith(elem)) {
202                     result.append(replacementList.value(elem));
203                     w = w.mid(elem.length());
204                     replaced = true;
205                     break;
206                 }
207             }
208             if (!replaced) {
209                 result.append(w.left(1));
210                 w = w.mid(1);
211             }
212         }
213     }else{
214         result=w;
215     }
216     // remove leading and trailing "
217     if(result.startsWith("\"")){
218         result=result.mid(1);
219     }
220     if(result.endsWith("\"")){
221         result.chop(1);
222     }
223 	return result;
224 }
225 
textToLatex(const QString & text)226 QString textToLatex(const QString &text)
227 {
228 	QList<QPair<QString, QString> > replaceList;
229 	// replacements for resevered characters according to
230 	// http://en.wikibooks.org/wiki/LaTeX/Basics#Reserved_Characters
231     QString result = text;
232     result.replace("{", "\\{");
233     result.replace("}", "\\}");
234     result.replace(QRegularExpression("\\\\(?![{}])"),"\\textbackslash{}");
235 	replaceList.append(QPair<QString, QString> ("#", "\\#"));
236 	replaceList.append(QPair<QString, QString> ("$", "\\$"));
237 	replaceList.append(QPair<QString, QString> ("%", "\\%"));
238 	replaceList.append(QPair<QString, QString> ("&", "\\&"));
239 	replaceList.append(QPair<QString, QString> ("~", "\\~{}"));
240 	replaceList.append(QPair<QString, QString> ("_", "\\_"));
241 	replaceList.append(QPair<QString, QString> ("^", "\\^{}"));
242 
243 
244 	for (QList<QPair<QString, QString> >::const_iterator it = replaceList.begin(); it != replaceList.end(); ++it)
245 		result.replace(it->first, it->second);
246 
247 	result.replace(QRegularExpression("\"(.*?)\""), "``\\1''");
248 
249 	return result;
250 }
251 
252 
startOfArg(const QString & s,int index)253 int startOfArg(const QString &s, int index) {
254 	for (int i=index; i < s.length(); i++) {
255 		if (s.at(i).isSpace()) continue;
256 		if (s.at(i) == '{') return i;
257 		return -1;
258 	}
259 	return -1;
260 }
261 
262 /*!
263  * Parses a Latex string to a plain string.
264  * Specifically, this substitues \texorpdfstring and removes explicit hyphens.
265  */
latexToText(QString s)266 QString latexToText(QString s)
267 {
268 	// substitute \texorpdfstring
269 	int start, stop;
270 	int texorpdfstringLength = 15;
271 	start = s.indexOf("\\texorpdfstring");
272 	while (start >= 0 && start < s.length()) {
273 
274 		// first arg
275 		int i = startOfArg(s, start + texorpdfstringLength);
276 		if (i < 0) {  // no arguments for \\texorpdfstring
277 			start += texorpdfstringLength;
278 			start = s.indexOf("\\texorpdfstring", start);
279 			continue;
280 		}
281 		i++;
282 		stop = findClosingBracket(s, i);
283 		if (stop < 0) {  // missing closing bracket for first argument of \\texorpdfstring
284 			start += texorpdfstringLength;
285 			start = s.indexOf("\\texorpdfstring", start);
286 			continue;
287 		}
288 
289 		// second arg
290 		i = startOfArg(s, stop + 1);
291 		if (i < 0) {  // no second arg for \\texorpdfstring
292 			start += texorpdfstringLength;
293 			start = s.indexOf("\\texorpdfstring", start);
294 			continue;
295 		}
296 		i++;
297 		stop = findClosingBracket(s, i);
298 		if (stop < 0) {
299 			start += texorpdfstringLength;
300 			start = s.indexOf("\\texorpdfstring", start);
301 			continue;  // no second arg for \\texorpdfstring
302 		}
303 		s.remove(stop, 1);
304 		s.remove(start, i - start);
305 		start = s.indexOf("\\texorpdfstring", start);
306 	}
307 	// remove discretionary  hyphenations
308 	s.remove("\\-");
309 	return s;
310 }
311 
312 // joins all the input lines trimming whitespace. A new line is started on comments and empty lines
joinLinesExceptCommentsAndEmptyLines(const QStringList & lines)313 QStringList joinLinesExceptCommentsAndEmptyLines(const QStringList &lines){
314 
315 	QStringList joinedLines;
316 	QString tmpLine;
317 
318     #define FLUSH_TMPLINE()                      \
319         if(!tmpLine.isEmpty()){             \
320             joinedLines.append(tmpLine);    \
321             tmpLine.clear();                \
322         }
323 
324 	foreach (const QString &l, lines) {
325 		QString rtrimmedLine = trimRight(l);
326 
327 		if (rtrimmedLine.isEmpty()) { // empty line as separator
328             FLUSH_TMPLINE();
329 			joinedLines.append(rtrimmedLine);
330 			continue;
331 		}
332 
333 		if (tmpLine.isEmpty()) {
334 			tmpLine.append(rtrimmedLine);
335 		} else {
336 			tmpLine.append(" " + rtrimmedLine.trimmed());
337 		}
338 		int commentStart = LatexParser::commentStart(rtrimmedLine);
339 		if (commentStart >= 0) {
340             FLUSH_TMPLINE();
341 		}
342 	}
343 
344     FLUSH_TMPLINE();
345 
346     #undef FLUSH_TMPLINE
347 
348 	return joinedLines;
349 }
350 
351 // splits lines after maximal number of chars while keeping track of indentation and comments
splitLines(const QStringList & lines,int maxCharPerLine,const QRegularExpression & breakChars)352 QStringList splitLines(const QStringList &lines, int maxCharPerLine, const QRegularExpression &breakChars)
353 {
354 	QStringList splittedLines;
355 	int maxIndent = maxCharPerLine / 2 * 3;
356 	foreach (QString line, lines) {
357 		int textStart = 0;
358 		while (textStart < line.length() && line.at(textStart).isSpace() && textStart < maxIndent) textStart++;
359 		if (textStart >= line.length()) { // empty line
360 			splittedLines << line;
361 			continue;
362 		}
363 		int maxCharPerLineWithoutIndent = maxCharPerLine - textStart;
364 		QString indent = line.left(textStart);
365 		line = line.mid(textStart);
366 
367 		bool inComment = false;
368 		while (line.length() > maxCharPerLineWithoutIndent) {
369 			if (inComment) line.prepend("% ");
370 			int breakAt = line.lastIndexOf(breakChars, maxCharPerLineWithoutIndent);
371 			if (breakAt <= 3) breakAt = -1;
372 			QString leftPart = line.left(breakAt);
373 			splittedLines << indent + leftPart;
374 			if (breakAt >= 0) {
375 				line.remove(0, breakAt + 1);
376 				inComment = inComment || (LatexParser::commentStart(leftPart) >= 0);
377 			} else {
378 				line.clear();
379 				break;
380 			}
381 		}
382 		if (line.length() > 0) {
383 			if (inComment) line.prepend("% ");
384 			splittedLines << indent + line;
385 		}
386 	}
387 	return splittedLines;
388 }
389 
localeAwareLessThan(const QString & s1,const QString & s2)390 bool localeAwareLessThan(const QString &s1, const QString &s2)
391 {
392 	return QString::localeAwareCompare(s1, s2) < 0;
393 }
394 
395 // removes whitespace from the beginning of the string
trimLeft(const QString & s)396 QString trimLeft(const QString &s)
397 {
398 	int j;
399 	for (j = 0; j < s.length(); j++)
400 		if (s[j] != ' ' && s[j] != '\t' && s[j] != '\r' && s[j] != '\n') break;
401 	return s.mid(j);
402 }
403 
404 // removes whitespace from the end of the string
trimRight(const QString & s)405 QString trimRight(const QString &s)
406 {
407 	if (s.isEmpty()) return QString();
408 	int j;
409 	for (j = s.length() - 1; j >= 0; j--)
410 		if (s[j] != ' ' && s[j] != '\t' && s[j] != '\r' && s[j] != '\n') break;
411 	return s.left(j + 1);
412 }
413 /*!
414  * \brief get argument after command 'token'
415  *
416  * handles latex comments correctly
417  * \warning obsolete with lexer based token system
418  * \param line text of one line
419  * \param token latexcommand
420  * \return text after token
421  */
findToken(const QString & line,const QString & token)422 QString findToken(const QString &line, const QString &token)
423 {
424 	int tagStart = line.indexOf(token);
425     int commentStart = line.indexOf(QRegularExpression("(^|[^\\\\])%")); // find start of comment (if any)
426 	if (tagStart != -1 && (commentStart > tagStart || commentStart == -1)) {
427 		tagStart += token.length();
428 		int tagEnd = line.indexOf("}", tagStart);
429 		if (tagEnd != -1) return line.mid(tagStart, tagEnd - tagStart);
430 		else return line.mid(tagStart); //return everything after line if there is no }
431 	}
432 	return "";
433 }
434 /*!
435  * \brief get argument after command 'token'
436  *
437  * handles latex comments correctly
438  * \warning obsolete with lexer based token system
439  * \param line text of one line
440  * \param token latexcommand
441  * \param start column number
442  * \return text after token
443  */
findToken(const QString & line,const QString & token,int & start)444 QString findToken(const QString &line, const QString &token, int &start)
445 {
446 	int tagStart = line.indexOf(token, start);
447     int commentStart = line.indexOf(QRegularExpression("(^|[^\\\\])%")); // find start of comment (if any)
448 	if (tagStart != -1 && (commentStart > tagStart || commentStart == -1)) {
449 		tagStart += token.length();
450 		int tagEnd = line.indexOf("}", tagStart);
451 		start = tagStart;
452 		if (tagEnd != -1) return line.mid(tagStart, tagEnd - tagStart);
453 		else return line.mid(tagStart); //return everything after line if there is no }
454 	}
455 	start = -2;
456 	return "";
457 }
458 /*!
459  * \brief get argument after command 'token'
460  *
461  * handles latex comments correctly
462  * \warning obsolete with lexer based token system
463  * \param line text of one line
464  * \param token regexp to search
465  * \return text after token
466  */
findToken(const QString & line,QRegExp & token)467 QString findToken(const QString &line, QRegExp &token)
468 {
469 	//ATTENTION: token is not const because, you can't call cap on const qregexp in qt < 4.5
470 	int tagStart = 0;
471 	QString s = line;
472 	tagStart = token.indexIn(line);
473     int commentStart = line.indexOf(QRegularExpression("(^|[^\\\\])%")); // find start of comment (if any)
474 	if (tagStart != -1 && (commentStart > tagStart || commentStart == -1)) {
475 		s = s.mid(tagStart + token.cap(0).length(), s.length());
476 		return s;
477 	}
478 	return "";
479 }
480 
findTokenWithArg(const QString & line,const QString & token,QString & outName,QString & outArg)481 bool findTokenWithArg(const QString &line, const QString &token, QString &outName, QString &outArg)
482 {
483 	outName = "";
484 	outArg = "";
485 	int tagStart = line.indexOf(token);
486     int commentStart = line.indexOf(QRegularExpression("(^|[^\\\\])%")); // find start of comment (if any)
487 	if (tagStart != -1 && (commentStart > tagStart || commentStart == -1)) {
488 		tagStart += token.length();
489 		int tagEnd = line.indexOf("}", tagStart);
490 		if (tagEnd != -1) {
491 			outName = line.mid(tagStart, tagEnd - tagStart);
492 			int curlyOpen = line.indexOf("{", tagEnd);
493 			int optionStart = line.indexOf("[", tagEnd);
494 			if (optionStart < curlyOpen || (curlyOpen == -1 && optionStart != -1)) {
495 				int optionEnd = line.indexOf("]", optionStart);
496 				if (optionEnd != -1) outArg = line.mid(optionStart + 1, optionEnd - optionStart - 1);
497 				else outArg = line.mid(optionStart + 1);
498 			}
499 		} else outName = line.mid(tagStart); //return everything after line if there is no }
500 		return true;
501 	}
502 	return false;
503 }
504 
505 /*! returns the command at pos (including \) in outCmd. pos may be anywhere in the command name (including \) but
506  * not in command options. Return value is the index of the first char after the command (or pos if there was no command
507  * \warning obsolete with lexer-based token system
508  */
509 // TODO: currently does not work for command '\\'
getCommand(const QString & line,QString & outCmd,int pos)510 int getCommand(const QString &line, QString &outCmd, int pos)
511 {
512 	int start = pos;
513 
514 	while (line.at(start) != '\\') { // find beginning
515 		if (!isCommandChar(line.at(start)) || start == 0) return pos; // no command
516 		start--;
517 	}
518 
519 	int i = pos + 1;
520 	for (; i < line.length(); i++)
521 		if (!isCommandChar(line.at(i))) break;
522 	outCmd = line.mid(start, i - start);
523 	return i;
524 }
525 
526 /*! returns command option list. pos has to be at the beginning of the first bracket
527  * posBehind returns the position after the last bracket, you may pass the same variable as in pos
528  * \warning obsolete with lexer-based token system
529  */
getCommandOptions(const QString & line,int pos,int * posBehind)530 QList<CommandArgument> getCommandOptions(const QString &line, int pos, int *posBehind)
531 {
532 	static QMap<QChar, QChar> cbs;
533 	if (cbs.isEmpty()) {
534 		cbs[QChar('{')] = QChar('}');
535 		cbs[QChar('[')] = QChar(']');
536 	}
537 
538 	QList<CommandArgument> options;
539 
540 	int start = pos;
541 	if (posBehind) *posBehind = start;
542 	if (pos >= line.length()) return options;
543 	QChar oc = line[start];
544 	if (!cbs.contains(oc)) return options;
545 
546 	for (int num = 1;; num++) {
547 		int end = findClosingBracket(line, start, oc, cbs[oc]);
548 		if (end < 0) break; // open without close
549 		CommandArgument arg;
550 		arg.isOptional = (oc == '[');
551 		arg.number = num;
552 		arg.value = line.mid(start + 1, end - start - 1);
553 		options.append(arg);
554 		start = end + 1;
555 		if (posBehind) *posBehind = start;
556 		if (start >= line.length() || !cbs.contains(line[start])) break; // close on last char or last option reached
557 		else oc = line[start];
558 	}
559 	return options;
560 }
561 
562 /* returns the item at pos in a colon separated list of options (empty on colon
563  * e.g. getParamItem("{one, two, three}", 7) returns "two"
564  * \warning obsolete with lexer-based token system
565  */
getParamItem(const QString & line,int pos,bool stopAtWhiteSpace)566 QString getParamItem(const QString &line, int pos, bool stopAtWhiteSpace)
567 {
568 	REQUIRE_RET(pos <= line.length(), QString());
569 	int start;
570 	int curlCount = 0;
571 	int squareCount = 0;
572 	QString openDelim(",{[");
573 	if (stopAtWhiteSpace) openDelim += " \t\n\r";
574 	for (start = pos; start > 0; start--) {
575 		QChar c = line.at(start - 1);
576 		if (c == '}' && openDelim.contains('{')) curlCount++;
577 		if (c == '{') {
578 			if (curlCount-- <= 0) break;
579 			else continue;
580 		}
581 		if (c == ']' && openDelim.contains('[')) squareCount++;
582 		if (c == '[') {
583 			if (squareCount-- <= 0) break;
584 			else continue;
585 		}
586 		if (openDelim.contains(c)) break;
587 	}
588 	int end = pos;
589 	QString closeDelim(",]}");
590 	if (stopAtWhiteSpace) closeDelim += " \t\n\r";
591 	curlCount = 0;
592 	squareCount = 0;
593 	for (end = pos; end < line.length(); end++) {
594 		QChar c = line.at(end);
595 		if (c == '{' && closeDelim.contains('}')) curlCount++;
596 		if (c == '}') {
597 			if (curlCount-- <= 0) break;
598 			else continue;
599 		}
600 		if (c == '[' && closeDelim.contains(']')) squareCount++;
601 		if (c == ']') {
602 			if (squareCount-- <= 0) break;
603 			else continue;
604 		}
605 		if (closeDelim.contains(c)) break;
606 	}
607 	return line.mid(start, end - start);
608 }
609 
generateRegExp(const QString & text,const bool isCase,const bool isWord,const bool isRegExp)610 QRegExp generateRegExp(const QString &text, const bool isCase, const bool isWord, const bool isRegExp)
611 {
612 	Qt::CaseSensitivity cs = isCase ? Qt::CaseSensitive : Qt::CaseInsensitive;
613 	QRegExp m_regexp;
614 	if ( isRegExp ) {
615 		m_regexp = QRegExp(text, cs, QRegExp::RegExp);
616 	} else if ( isWord ) {
617 		//todo: screw this? it prevents searching of "world!" and similar things
618 		//(qtextdocument just checks the surrounding character when searching for whole words, this would also allow wholewords|regexp search)
619 		m_regexp = QRegExp(
620 		               QString("\\b%1\\b").arg(QRegExp::escape(text)),
621 		               cs,
622 		               QRegExp::RegExp
623 		           );
624 	} else {
625 		m_regexp = QRegExp(text, cs, QRegExp::FixedString);
626 	}
627 	return m_regexp;
628 }
629 
generateRegularExpression(const QString & text,const bool isCase,const bool isWord,const bool isRegExp)630 QRegularExpression generateRegularExpression(const QString &text, const bool isCase, const bool isWord, const bool isRegExp)
631 {
632     QRegularExpression::PatternOption po = isCase ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption;
633     QRegularExpression m_regexp;
634     if ( isRegExp ) {
635         m_regexp = QRegularExpression(text,  po);
636     } else if ( isWord ) {
637         //todo: screw this? it prevents searching of "world!" and similar things
638         //(qtextdocument just checks the surrounding character when searching for whole words, this would also allow wholewords|regexp search)
639         m_regexp = QRegularExpression(
640                        QString("\\b%1\\b").arg(QRegularExpression::escape(text)),
641                        po
642                    );
643     } else {
644         m_regexp = QRegularExpression(QRegularExpression::escape(text), po);
645     }
646     return m_regexp;
647 }
648 
regExpFindAllMatches(const QString & searchIn,const QRegExp & regexp,int cap)649 QStringList regExpFindAllMatches(const QString &searchIn, const QRegExp &regexp, int cap)
650 {
651 	int offset = regexp.indexIn(searchIn);
652 	QStringList res;
653 	while (offset > -1) {
654 		res << regexp.cap(cap);
655 		offset = regexp.indexIn(searchIn, offset + regexp.matchedLength());
656 	}
657 	return res;
658 }
659 
660 
regularExpressionFindAllMatches(const QString & searchIn,const QRegularExpression & regexp,int cap)661 QStringList regularExpressionFindAllMatches(const QString &searchIn, const QRegularExpression &regexp, int cap)
662 {
663     QRegularExpressionMatch match = regexp.match(searchIn);
664     int offset=match.capturedStart();
665     QStringList res;
666     while (offset > -1) {
667         res << match.captured(cap);
668         match = regexp.match(searchIn,offset+match.capturedLength());
669         offset = match.capturedStart();
670     }
671     return res;
672 }
673 /*!
674  * a multi-match equivalent of QString::indexOf(QString)
675  */
indicesOf(const QString & line,const QString & word,Qt::CaseSensitivity cs)676 QList<int> indicesOf(const QString &line, const QString &word, Qt::CaseSensitivity cs)
677 {
678 	QList<int> columns;
679 	int col = 0;
680 	while (col < line.length() - 1) {
681 		col = line.indexOf(word, col, cs);
682 		if (col < 0) break;
683 		columns.append(col);
684 		col++;
685 	}
686 	return columns;
687 }
688 
689 /*!
690  * a multi-match equivalent of QString::indexOf(QRegExp)
691  */
indicesOf(const QString & line,const QRegularExpression & rx)692 QList<int> indicesOf(const QString &line, const QRegularExpression &rx)
693 {
694 	QList<int> columns;
695 	int col = 0;
696 	// exact match
697 	while (col < line.length() - 1) {
698 		col = line.indexOf(rx, col);
699 		if (col < 0) break;
700 		columns.append(col);
701 		col++;
702 	}
703 	return columns;
704 }
705 
addEnvironmentToDom(QDomDocument & doc,const QString & EnvironName,const QString & EnvironMode,bool completeParentheses)706 void addEnvironmentToDom(QDomDocument &doc, const QString &EnvironName, const QString &EnvironMode, bool completeParentheses)
707 {
708 	QDomElement root = doc.documentElement();
709 	QDomElement tag = doc.createElement("context");
710 	tag.setAttribute("id", EnvironMode == "numbers" ? "mathMyEnv" : "myVerb");
711 	tag.setAttribute("format", EnvironMode);
712 	if (EnvironMode != "comment") tag.setAttribute("transparency", "true");
713 	QDomElement child1 = doc.createElement("start");
714 	child1.setAttribute("parenthesis", QString("my%1:open%2").arg(EnvironName).arg(completeParentheses ? "" : "@nocomplete"));
715 	child1.setAttribute("fold", "true");
716 	child1.setAttribute("format", "extra-keyword");
717 	child1.setAttribute("parenthesisWeight", "30");
718 	QDomText dtxt = doc.createTextNode(QString("\\\\begin{%1}").arg(EnvironName));
719 	child1.appendChild(dtxt);
720 	QDomElement child2 = doc.createElement("stop");
721 	child2.setAttribute("parenthesis", QString("my%1:close%2").arg(EnvironName).arg(completeParentheses ? "" : "@nocomplete"));
722 	child2.setAttribute("fold", "true");
723 	child2.setAttribute("format", "extra-keyword");
724 	child2.setAttribute("parenthesisWeight", "30");
725 	QDomText dtxt2 = doc.createTextNode(QString("\\\\end{%1}").arg(EnvironName));
726 	child2.appendChild(dtxt2);
727 
728 	tag.appendChild(child1);
729 	tag.appendChild(child2);
730 
731 	if (EnvironMode == "numbers") {
732 		QDomElement child3 = doc.createElement("word");
733 		child3.setAttribute("id", "keywords/single");
734 		child3.setAttribute("format", "math-keyword");
735 		child3.appendChild(doc.createTextNode("\\\\[a-zA-Z]+"));
736 		tag.appendChild(child3);
737 	}
738 
739 	//insert before the first context with the same format, so that transparency is actually used
740 	QDomNode insertAt;
741 	for (int i = 0; i < root.childNodes().size(); i++)
742 		if (root.childNodes().item(i).attributes().namedItem("format").nodeValue() == EnvironMode) {
743 			insertAt = root.childNodes().item(i);
744 			break;
745 		}
746 	root.insertBefore(tag, insertAt);
747 }
748 
749 /*! adds entries for structure commands to the Dom of a QNFA file
750  * commands are taken from possibleCommands["%structure0"] to possibleCommands["%structureN"]
751  */
addStructureCommandsToDom(QDomDocument & doc,const QHash<QString,QSet<QString>> & possibleCommands)752 void addStructureCommandsToDom(QDomDocument &doc , const QHash<QString, QSet<QString> > &possibleCommands)
753 {
754 	QDomElement root = doc.documentElement();
755 
756 	QDomNode parent;
757 	for (int i = root.childNodes().size() - 1; i >= 0; i--) {
758 		if (root.childNodes().item(i).attributes().namedItem("id").nodeValue() == "keywords/structure") {
759 			parent = root.childNodes().item(i);
760 			break;
761 		}
762 	}
763 	if (parent.isNull()) {
764 		return;
765 	}
766 
767 	while (!parent.firstChild().isNull()) {
768 		parent.removeChild(parent.firstChild());
769 	}
770 
771 	for (int level = 0; level <= LatexParser::MAX_STRUCTURE_LEVEL; level++) {
772 		foreach (const QString &cmd, possibleCommands[QString("%structure%1").arg(level)]) {
773 			QDomElement child = doc.createElement("word");
774 			QString name = cmd;
775 			name.remove('\\');
776 			child.setAttribute("parenthesis", QString("structure%1:boundary@nomatch").arg(level));
777 			child.setAttribute("parenthesisWeight", QString("%1").arg(8 - level));
778 			child.setAttribute("fold", "true");
779 			name = cmd;
780 			name.replace('\\', "\\\\");  // words are regexps, so we have to escape the slash
781 			QDomText dtxt = doc.createTextNode(name);
782 			child.appendChild(dtxt);
783 			parent.appendChild(child);
784 		}
785 	}
786 }
787 
788 
789 
790 /*!
791  * \brief convert a list of integer in one string with a textual representation of said integers
792  *
793  * The numbers are given as text, separated by commas
794  * \param ints list of integer
795  * \return string containg a textual list of integers
796  */
intListToStr(const QList<int> & ints)797 QString intListToStr(const QList<int> &ints)
798 {
799 	QString s = "";
800 	foreach (int i, ints) {
801 		s.append(QString::number(i) + ',');
802 	}
803 	if (s.length() > 0)
804 		s.remove(s.length() - 1, 1); // remove last ','
805 	return s;
806 }
807 
strToIntList(const QString & s)808 QList<int> strToIntList(const QString &s)
809 {
810 	QList<int> ints;
811 	bool ok;
812 	foreach (const QString &si, s.split(',')) {
813 		int i = si.toInt(&ok);
814 		if (ok) ints << i;
815 	}
816 	return ints;
817 }
818 
enquoteStr(const QString & s)819 QString enquoteStr(const QString &s)
820 {
821 	QString res = s;
822 	res.replace('"', "\\\"");
823 	res.prepend('"');
824 	res.append('"');
825 	return res;
826 }
827 
dequoteStr(const QString & s)828 QString dequoteStr(const QString &s)
829 {
830 	QString res = s;
831 	if (res.endsWith('"') && !res.endsWith("\\\""))
832 		res.remove(res.length() - 1, 1);
833 	if (res.startsWith('"'))
834 		res.remove(0, 1);
835 	res.replace("\\\"", "\"");
836 	return res;
837 }
838 
839 /** add a quotation around the string if it does not already have one. **/
quotePath(const QString & s)840 QString quotePath(const QString &s)
841 {
842 	if (s.startsWith('"') || !s.contains(' ')) return QString(s);
843 	return QString("\"%1\"").arg(s);
844 }
845 
846 /** if the string is surrounded by qoutes, remove these **/
removeQuote(const QString & s)847 QString removeQuote(const QString &s)
848 {
849 	if (s.length() >= 2 && s.startsWith('"') && s.endsWith('"')) {
850 		return s.mid(1, s.length() - 2);
851 	}
852 	return s;
853 }
854 
removePathDelim(const QString & s)855 QString removePathDelim(const QString &s)
856 {
857 	// we use the explicit chars intentionally and not QDir::separator()
858 	// because it shall also work for / on windows (many paths are internally
859 	// represented with / as delimiter
860 	if (s.endsWith('/') || s.endsWith('\\')) {
861 		return s.left(s.length() - 1);
862 	}
863 	return s;
864 }
865 
removeAccents(const QString & s)866 QString removeAccents(const QString &s) {
867 	QString diacriticLetters = QString::fromUtf8("ŠŒŽšœžŸ¥µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿ");
868 	QStringList noDiacriticLetters = QStringList() << "S"<<"OE"<<"Z"<<"s"<<"oe"<<"z"<<"Y"<<"Y"<<"u"<<"A"<<"A"<<"A"<<"A"<<"A"<<"A"<<"AE"<<"C"<<"E"<<"E"<<"E"<<"E"<<"I"<<"I"<<"I"<<"I"<<"D"<<"N"<<"O"<<"O"<<"O"<<"O"<<"O"<<"O"<<"U"<<"U"<<"U"<<"U"<<"Y"<<"s"<<"a"<<"a"<<"a"<<"a"<<"a"<<"a"<<"ae"<<"c"<<"e"<<"e"<<"e"<<"e"<<"i"<<"i"<<"i"<<"i"<<"o"<<"n"<<"o"<<"o"<<"o"<<"o"<<"o"<<"o"<<"u"<<"u"<<"u"<<"u"<<"y"<<"y";
869 
870 	QString output = "";
871 	for (int i = 0; i < s.length(); i++) {
872 		QChar c = s[i];
873 		int dIndex = diacriticLetters.indexOf(c);
874 		if (dIndex < 0) {
875 			output.append(c);
876 		} else {
877 			QString replacement = noDiacriticLetters[dIndex];
878 			output.append(replacement);
879 		}
880 	}
881 
882 	return output;
883 }
884 
makeLatexLabel(const QString & s)885 QString makeLatexLabel(const QString &s) {
886 	QString sNorm = removeAccents(s).normalized(QString::NormalizationForm_KD).toLower();
887 	sNorm.replace(' ', '-');
888     sNorm.remove(QRegularExpression("[^a-z0-9\\-]"));
889 	return sNorm;
890 }
891 
892 /*! Splits a command string into the command an arguments.
893  *  This respects quoted arguments. Output redirection operators are separate tokens
894  */
tokenizeCommandLine(const QString & commandLine)895 QStringList tokenizeCommandLine(const QString &commandLine) {
896 
897     QStringList result;
898 	QString currentToken = "";
899 	currentToken.reserve(30);
900 	bool inQuote = false;
901 	bool escape= false;
902 
903     #define FLUSH(value)        \
904         if(!(value).isEmpty())  \
905             result << (value);  \
906                                 \
907         (value) = "";
908 
909 	foreach (const QChar &c, commandLine) {
910 		if (c.isSpace()) {
911 			if (inQuote) {
912 				currentToken.append(c);
913 			} else {
914                 FLUSH(currentToken)
915 			}
916 		} else if (c == '\\') {
917 			escape = !escape;
918 			currentToken.append(c);
919 			continue;
920 		} else if (c == '"') {
921 			if (!escape) inQuote = !inQuote;
922 			currentToken.append(c);
923 		} else if (c == '>') {
924 			if (inQuote) {
925 				currentToken.append(c);
926 			} else if (currentToken == "2"){
927 				currentToken.append(c);
928                 FLUSH(currentToken)
929 			} else {
930                 FLUSH(currentToken)
931 				currentToken = c;
932                 FLUSH(currentToken)
933 			}
934 		} else {
935 			currentToken.append(c);
936 		}
937 		escape = false;
938 	}
939 
940     FLUSH(currentToken)
941 
942     #undef FLUSH
943 
944 	return result;
945 }
946 
extractOutputRedirection(const QStringList & commandArgs,QString & stdOut,QString & stdErr)947 QStringList extractOutputRedirection(const QStringList &commandArgs, QString &stdOut, QString &stdErr) {
948 	QStringList extracted;
949 	bool extracted_finished = false;
950 	for (int i=0; i<commandArgs.length(); i++) {
951 		if (commandArgs[i] == ">" && i < commandArgs.length()-1) {
952 			stdOut = commandArgs[i+1];
953 			i += 1;
954 			extracted_finished = true;
955 		} else if (commandArgs[i].startsWith(">")) {
956 			stdOut = commandArgs[i].mid(1);
957 			extracted_finished = true;
958 		} else if (commandArgs[i] == "2>" && i < commandArgs.length()-1) {
959 			stdErr = commandArgs[i+1];
960 			i += 1;
961 			extracted_finished = true;
962 		} else if (commandArgs[i].startsWith("2>")) {
963 			stdErr = commandArgs[i].mid(2);
964 			extracted_finished = true;
965 		} else {
966 			if (!extracted_finished)
967 				extracted << commandArgs[i];
968 		}
969 	}
970 	return extracted;
971 }
972 
973 
joinUnicodeSurrogate(const QChar & highSurrogate,const QChar & lowSurrogate)974 uint joinUnicodeSurrogate(const QChar &highSurrogate, const QChar &lowSurrogate)
975 {
976 	uint uhigh = highSurrogate.unicode();
977 	uint ulow = lowSurrogate.unicode();
978 	uint code = 0x10000;
979 	code += (uhigh & 0x03FF) << 10;
980 	code += (ulow & 0x03FF);
981 	return code;
982 }
983 
984 
getImageAsText(const QPixmap & AImage,const int w)985 QString getImageAsText(const QPixmap &AImage, const int w)
986 {
987 	QByteArray ba;
988 	QBuffer buffer(&ba);
989 	buffer.open(QIODevice::WriteOnly);
990 	AImage.save(&buffer, "PNG");
991 	QString text = w < 0 ? QString("<img src=\"data:image/png;base64,%1\">").arg(QString(buffer.data().toBase64())) : QString("<img src=\"data:image/png;base64,%1\" width=%2 >").arg(QString(buffer.data().toBase64())).arg(w);
992 	return text;
993 }
994 
995 /*!
996  * Shows a tooltip at the given position (pos = top left corner).
997  * If the tooltip does not fit on the screen, it's attempted to position it to the left including
998  * a possible relatedWidgetWidth offset (pos - relatedWidgetWidth = top right corner).
999  * If there is not enough space as well the text is shown in the position (left/right) of maxium
1000  * available space and the text lines are shortend to fit the available space.
1001  */
showTooltipLimited(QPoint pos,QString text,int relatedWidgetWidth)1002 void showTooltipLimited(QPoint pos, QString text, int relatedWidgetWidth)
1003 {
1004 	text.replace("\t", "    "); //if there are tabs at the position in the string, qt crashes. (13707)
1005 	QRect screen = UtilsUi::getAvailableGeometryAt(pos);
1006 	// estimate width of coming tooltip
1007 	// rather dirty code
1008 	bool textWillWarp = Qt::mightBeRichText(text);
1009     QLabel lLabel(nullptr, Qt::ToolTip);
1010 	lLabel.setFont(QToolTip::font());
1011     lLabel.setMargin(1 + lLabel.style()->pixelMetric(QStyle::PM_ToolTipLabelFrameWidth, nullptr, &lLabel));
1012 	lLabel.setFrameStyle(QFrame::StyledPanel);
1013 	lLabel.setAlignment(Qt::AlignLeft);
1014 	lLabel.setIndent(1);
1015 	lLabel.setWordWrap(textWillWarp);
1016 	lLabel.ensurePolished();
1017 	lLabel.setText(text);
1018 	lLabel.adjustSize();
1019 	int textWidthInPixels = lLabel.width() + 10; // +10 good guess
1020 
1021 	if (pos.x() - screen.x() + textWidthInPixels <= screen.width()) {
1022 		// tooltip fits at the requested position
1023 		QToolTip::showText(pos, text);
1024 	} else {
1025 		// try positioning the tooltip left of the releated widget
1026 		QPoint posLeft(pos.x() - textWidthInPixels - relatedWidgetWidth, pos.y());
1027 		if (posLeft.x() >= screen.x()) {
1028 			QToolTip::showText(posLeft, text);
1029 		} else {
1030 			// text does not fit to the left
1031 			// choose the position left/right with the maximum available space
1032 			int availableWidthLeft = (pos.x() - screen.x()) - relatedWidgetWidth;
1033 			int availableWidthRight = screen.width() - (pos.x() - screen.x());
1034 			int availableWidth = qMax(availableWidthLeft, availableWidthRight);
1035 			bool positionLeft = availableWidthLeft > availableWidthRight;
1036 			if (!textWillWarp) {
1037 				// shorten text lines to fit textwidth (only feasible if the tooltip does not wrap)
1038 				QStringList lines = text.split("\n");
1039 				int maxLength = 0;
1040 				QString maxLine;
1041 				foreach (const QString line, lines) {
1042 					if (line.length() > maxLength) {
1043 						maxLength = line.length();
1044 						maxLine = line;
1045 					}
1046 				}
1047 				int averageWidth = lLabel.fontMetrics().averageCharWidth();
1048                 if(averageWidth>1){
1049                     maxLength = qMin(maxLength, availableWidth / averageWidth);
1050                 }
1051 				while (textWidthInPixels > availableWidth && maxLength > 10) {
1052 					maxLength -= 2;
1053 					for (int i = 0; i < lines.count(); i++) {
1054 						lines[i] = lines[i].left(maxLength);
1055 					}
1056 					lLabel.setText(lines.join("\n"));
1057 					lLabel.adjustSize();
1058 					textWidthInPixels = lLabel.width() + 10;
1059 				}
1060 				text = lines.join("\n");
1061 			}
1062 			if (positionLeft) {
1063 				posLeft.setX(pos.x() - textWidthInPixels - relatedWidgetWidth);
1064 				QToolTip::showText(posLeft, text);
1065 			} else {
1066 				QToolTip::showText(pos, text);
1067 			}
1068 		}
1069 	}
1070 }
1071 
truncateLines(const QString & s,int maxLines)1072 QString truncateLines(const QString &s, int maxLines)
1073 {
1074 	int lineCount = 0;
1075 	for (int i = 0; i < s.length(); i++) {
1076 		if (s[i] == '\n') lineCount++;
1077 		if (lineCount >= maxLines) {
1078 			return s.left(i + 1) + "...";
1079 		}
1080 	}
1081 	return s;
1082 }
1083 
1084 /*
1085  * Utility function for most recent strings, e.g. for filenames
1086  * The item is inserted at the front and removed if present in the rest of the list.
1087  * The list will not get longer than maxLength.
1088  * Returns true if the list contents changed (i.e. item was not already in first place)
1089  */
addMostRecent(const QString & item,QStringList & mostRecentList,int maxLength)1090 bool addMostRecent(const QString &item, QStringList &mostRecentList, int maxLength)
1091 {
1092 	int p = mostRecentList.indexOf(item);
1093 	bool changed = (p != 0);
1094 	if (!changed) return changed;
1095 
1096 	if (p > 0) mostRecentList.removeAt(p);
1097 	mostRecentList.prepend(item);
1098 	if (mostRecentList.count() > maxLength) mostRecentList.removeLast();
1099 	return changed;
1100 }
1101 
1102