1 /*
2  note_edit_highlight.h     MindForger thinking notebook
3 
4  Copyright (C) 2016-2020 Martin Dvorak <martin.dvorak@mindforger.com>
5 
6  This program is free software; you can redistribute it and/or
7  modify it under the terms of the GNU General Public License
8  as published by the Free Software Foundation; either version 2
9  of the License, or (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "note_edit_highlight.h"
20 
21 namespace m8r {
22 
23 using namespace std;
24 
NoteEditHighlight(QTextDocument * parent)25 NoteEditHighlight::NoteEditHighlight(QTextDocument* parent)
26     : QSyntaxHighlighter(parent),
27       lookAndFeels(LookAndFeels::getInstance())
28 {
29     /*
30      * HTML inlined in MD - goes first so that formatting can be rewritten by MD
31      */
32 
33     addRegex(HtmlTag, "<[!?]?\\w+(?:/>)?", false);
34     addRegex(HtmlTag, "(?:</\\w+)?[?]?>");
35     addRegex(HtmlEntity, "&(:?#\\d+|\\w+);");
36     addRegex(HtmlComment, "<!--.*-->");
37     addRegex(HtmlAttribute, "(\\w+(?::\\w+)?)=(\"[^\"]+\"|'[^\']+')");
38 
39     htmlTagFormat.setForeground(lookAndFeels.getEditorHtmlTag());
40     htmlAttrNameFormat.setForeground(lookAndFeels.getEditorHtmlAttrName());
41     htmlAttValueFormat.setForeground(lookAndFeels.getEditorHtmlAttrValue());
42     htmlEntityFormat.setForeground(lookAndFeels.getEditorHtmlEntity());
43     htmlCommentFormat.setForeground(lookAndFeels.getEditorHtmlComment());
44     htmlCommentFormat.setFontItalic(true);
45 
46     /*
47      * Markdown (check QRegExp or Perl regexps)
48      */
49 
50     // regexps
51     addRegex(Bold, "\\*\\S[\\S\\s]+\\*",true);
52     addRegex(Bolder, "\\*\\*[\\S\\s]+\\*\\*");
53     addRegex(Italic, "_[\\S\\s]+_");
54     addRegex(Italicer, "__[\\S\\s]+\\__");
55     addRegex(Strikethrough, "~~[\\S\\s]+\\~~");
56     addRegex(Link, "\\[(:?[\\S\\s]+)\\]\\([\\S\\s]+\\)");
57     addRegex(Autolink, "https?://\\S+",false);
58     addRegex(Codeblock, "`[\\S\\s]+`");
59     addRegex(Mathblock, "\\$[\\S\\s]+\\$");
60     addRegex(UnorderedList, "^(:?    )*[\\*\\+\\-] ");
61     addRegex(OrderedList, "^(:?    )*\\d\\d?\\. ");
62     // IMPROVE highlight tasks (red/green) that overwrite lists , BUT new regexps make highlighting slower - is it worth to highlight it?
63 
64     // formats
65     boldFormat.setForeground(lookAndFeels.getEditorBold());
66     bolderFormat.setForeground(lookAndFeels.getEditorBolder());
67     italicFormat.setForeground(lookAndFeels.getEditorItalic());
68     italicFormat.setFontItalic(true);
69     italicerFormat.setForeground(lookAndFeels.getEditorItalicer());
70     italicerFormat.setFontItalic(true);
71     italicerFormat.setFontWeight(QFont::Bold);
72     strikethroughFormat.setForeground(lookAndFeels.getEditorStrikethrough());
73     linkFormat.setForeground(lookAndFeels.getEditorLink());
74     listFormat.setForeground(lookAndFeels.getEditorList());
75     codeBlockFormat.setForeground(lookAndFeels.getEditorCodeblock());
76     mathBlockFormat.setForeground(lookAndFeels.getEditorCodeblock());
77 
78 #if QT_VERSION > QT_VERSION_CHECK(5, 5, 0)
79     bolderFormat.setFontWeight(QFont::ExtraBold);
80     listFormat.setFontWeight(QFont::ExtraBold);
81 #else
82     bolderFormat.setFontWeight(QFont::Black);
83     listFormat.setFontWeight(QFont::Black);
84 #endif
85 }
86 
~NoteEditHighlight()87 NoteEditHighlight::~NoteEditHighlight()
88 {
89     for(auto& p:typeAndRegex) {
90         delete p;
91     }
92     typeAndRegex.clear();
93 }
94 
95 
96 /**
97  * @brief Add regexp for matching.
98  * @param minimal   controls non-greed vs greedy matching
99  *
100  * Add Qt's Perl compatible regexp - see QRegExp or https://perlmaven.com/regex-cheat-sheet
101  */
addRegex(Type type,const QString & pattern,bool minimal)102 void NoteEditHighlight::addRegex(Type type, const QString &pattern, bool minimal)
103 {
104     QRegExp* regex = new QRegExp{pattern};
105     regex->setPatternSyntax(QRegExp::RegExp2);
106     regex->setMinimal(minimal);
107 
108     std::pair<Type,QRegExp*>* p = new std::pair<Type,QRegExp*>(type,regex);
109     typeAndRegex.push_back(p);
110 }
111 
112 /**
113  * @brief This method is called for EACH line to highlight it.
114  *
115  * Multi-line highlighting is solved by maintaining a state as the
116  * whole document is being highlighted.
117  */
highlightBlock(const QString & text)118 void NoteEditHighlight::highlightBlock(const QString& text)
119 {
120     if(enabled) {
121         // clear format of the text
122         setCurrentBlockState(Normal);
123 
124         // when in MD code section, then there is no need to highlight anything
125         if(!highlightMultilineMdCode(text)) {
126             // highlight patterns defined using regexps
127             if(text.size()) highlightPatterns(text);
128             // eventually overwrite certain formatting with *multiline(s)* like MD code or HTML comments
129             highlightMultilineHtmlComments(text);
130         }
131     }
132 }
133 
134 /*
135  * This method get editor's text and it uses regexps to tokenize
136  * it. Then it assigns a format to every detected token using
137  * setFormat(offset,length) function.
138  */
highlightPatterns(const QString & text)139 void NoteEditHighlight::highlightPatterns(const QString& text)
140 {
141     // iterate all regexps - ORDER matters as latter regexps may OVERWRITE format of
142     // earlier regexps, e.g. consider bold rewritten by multiline code or bold rewriting
143     // bolder
144     // IMPROVE improve O(n) which is BIG and depends on the number of regexps:
145     //   O(n) = size(regexps) * lng(text)
146     for(auto p:typeAndRegex) {
147         Type type = p->first;
148         QRegExp* regex = p->second;
149 
150         // find 1st match for regex in text
151         int index = regex->indexIn(text);
152         // loop until there are other matches
153         while(index > -1) {
154             int length = regex->matchedLength();
155 
156             switch(type) {
157             case Bolder:
158                 setFormat(index, length, bolderFormat);
159                 break;
160             case Bold:
161                 setFormat(index, length, boldFormat);
162                 break;
163             case Italic:
164                 setFormat(index, length, italicFormat);
165                 break;
166             case Italicer:
167                 setFormat(index, length, italicerFormat);
168                 break;
169             case Strikethrough:
170                 setFormat(index, length, strikethroughFormat);
171                 break;
172             case Codeblock:
173                 setFormat(index, length, codeBlockFormat);
174                 break;
175             case Mathblock:
176                 setFormat(index, length, mathBlockFormat);
177                 break;
178             case Link:
179                 setFormat(index, length, linkFormat);
180                 break;
181             case Autolink:
182                 setFormat(index, length, linkFormat);
183                 break;
184             case UnorderedList:
185                 setFormat(index, length, listFormat);
186                 break;
187             case OrderedList:
188                 setFormat(index, length, listFormat);
189                 break;
190             case HtmlTag:
191                 setFormat(index, length, htmlTagFormat);
192                 break;
193             case HtmlAttribute:
194                 setFormat(
195                     index,
196                     regex->pos(2) - index - 1,
197                     htmlAttrNameFormat);
198                 setFormat(
199                     regex->pos(2) + 1,
200                     regex->cap(2).length() - 2,
201                     htmlAttValueFormat);
202                 break;
203             case HtmlEntity:
204                 setFormat(index, length, htmlEntityFormat);
205                 break;
206             case HtmlComment:
207                 // this is single line comment - multiline comments are matched by separate method
208                 setFormat(index, length, htmlCommentFormat);
209                 break;
210             }
211 
212             // match again
213             index = regex->indexIn(text, index+length);
214         }
215     }
216 }
217 
218 /**
219  * @brief Highlight MD multiline code and return true if the line has been formatted.
220  */
highlightMultilineMdCode(const QString & text)221 bool NoteEditHighlight::highlightMultilineMdCode(const QString &text)
222 {
223     static const QString TOKEN("```");
224 
225     if(previousBlockState()!=-1 && (previousBlockState()&InCode)==InCode) {
226         // already inside block
227         if(!text.compare(TOKEN)) {
228             // finish block ~ don't send anything
229             setFormat(0, TOKEN.length(), codeBlockFormat);
230             return true;
231         } else {
232             // continue block
233             setCurrentBlockState(currentBlockState()|InCode);
234             setFormat(0, text.size(), codeBlockFormat);
235             return true;
236         }
237     } else {
238         // outside block
239         if(text.startsWith(TOKEN)) {
240             // enter block ~ don't send anything
241             setCurrentBlockState(currentBlockState()|InCode);
242             setFormat(0, text.size(), codeBlockFormat);
243             return true;
244         } else {
245             setCurrentBlockState(Normal);
246             return false;
247         }
248     }
249 }
250 
highlightMultilineHtmlComments(const QString & text)251 void NoteEditHighlight::highlightMultilineHtmlComments(const QString &text)
252 {
253     static const QString BEGIN_TOKEN("<!--");
254     static const QString END_TOKEN("-->");
255 
256     if(previousBlockState() > -1 && (previousBlockState() & InComment) == InComment) {
257         int end = text.indexOf(END_TOKEN);
258         if (end == -1) {
259             setFormat(0, text.length(), htmlCommentFormat);
260             setCurrentBlockState(currentBlockState() | InComment);
261             return;
262         }
263         else {
264             setFormat(0, end + END_TOKEN.length(), htmlCommentFormat);
265         }
266     }
267 
268     int start = text.lastIndexOf(BEGIN_TOKEN);
269     if(start != -1) {
270         int end = text.lastIndexOf(END_TOKEN);
271         if(end < start) {
272             setFormat(start, text.length(), htmlCommentFormat);
273             setCurrentBlockState(currentBlockState() | InComment);
274         }
275     }
276 }
277 
278 } // m8r namespace
279