1# This file is part of ReText
2# Copyright: 2012-2021 Dmitry Shachnev
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17from ReText.editor import getColor
18from enum import IntFlag, auto
19import re
20
21from PyQt5.QtCore import Qt
22from PyQt5.QtGui import QFont, QSyntaxHighlighter, QTextCharFormat
23
24reHtmlTags     = re.compile('<[^<>@]*>')
25reHtmlSymbols  = re.compile(r'&#?\w+;')
26reHtmlStrings  = re.compile('"[^"<]*"(?=[^<]*>)')
27reHtmlComments = re.compile('<!--[^<>]*-->')
28reAsterisks    = re.compile(r'(?<!\*)\*[^ \*][^\*]*\*')
29reUnderline    = re.compile(r'(?<!_|\w)_[^_]+_(?!\w)')
30reDblAsterisks = re.compile(r'(?<!\*)\*\*((?!\*\*).)*\*\*')
31reDblUnderline = re.compile(r'(?<!_|\w)__[^_]+__(?!\w)')
32reTrpAsterisks = re.compile(r'\*{3,3}[^\*]+\*{3,3}')
33reTrpUnderline = re.compile('___[^_]+___')
34reMkdHeaders   = re.compile('^#.+')
35reMkdLinksImgs = re.compile(r'(?<=\[)[^\[\]]*(?=\])')
36reMkdLinkRefs  = re.compile(r'(?<=\]\()[^\(\)]*(?=\))')
37reBlockQuotes  = re.compile('^ *>.+')
38reReSTDirects  = re.compile(r'\.\. [a-z]+::')
39reReSTRoles    = re.compile('(:[a-z-]+:)(`.+?`)')
40reReSTLinks    = re.compile('(`.+?<)(.+?)(>`__?)')
41reReSTLinkRefs = re.compile(r'\.\. _`?(.*?)`?: (.*)')
42reReSTFldLists = re.compile('^ *:(.*?):')
43reTextileHdrs  = re.compile(r'^h[1-6][()<>=]*\.\s.+')
44reTextileQuot  = re.compile(r'^bq\.\s.+')
45reMkdCodeSpans = re.compile('`[^`]*`')
46reMkdMathSpans = re.compile(r'\\[\(\[].*?\\[\)\]]')
47reReSTCodeSpan = re.compile('``.+?``')
48reWords        = re.compile('[^_\\W]+')
49reSpacesOnEnd  = re.compile(r'\s+$')
50
51
52class Formatter:
53	def __init__(self, funcs=None):
54		self._funcs = funcs or []
55
56	def __or__(self, other):
57		result = Formatter(self._funcs.copy())
58		if isinstance(other, Formatter):
59			result._funcs.extend(other._funcs)
60		elif isinstance(other, QFont.Weight):
61			result._funcs.append(lambda f: f.setFontWeight(other))
62		return result
63
64	def format(self, charFormat):
65		for func in self._funcs:
66			func(charFormat)
67
68NF = Formatter()
69ITAL = Formatter([lambda f: f.setFontItalic(True)])
70UNDL = Formatter([lambda f: f.setFontUnderline(True)])
71
72def FG(colorName):
73	func = lambda f: f.setForeground(getColor(colorName))
74	return Formatter([func])
75
76def QString_length(text):
77	# In QString, surrogate pairs are represented using multiple QChars,
78	# so the length of QString is not always equal to the number of graphemes
79	# in it (which is the case with Python strings).
80	return sum(2 if ord(char) > 65535 else 1 for char in text)
81
82
83class Markup(IntFlag):
84	Mkd = auto()
85	ReST = auto()
86	Textile = auto()
87	HTML = auto()
88
89	# Special value which means that no other markup is allowed inside this pattern
90	CodeSpan = auto()
91
92
93docTypesMapping = {
94	'Markdown': Markup.Mkd,
95	'reStructuredText': Markup.ReST,
96	'Textile': Markup.Textile,
97	'html': Markup.HTML,
98}
99
100
101class ReTextHighlighter(QSyntaxHighlighter):
102	dictionary = None
103	docType = None
104
105	patterns = (
106		# regex,         color,                                markups
107		(reMkdCodeSpans, FG('codeSpans'),                      Markup.Mkd | Markup.CodeSpan),
108		(reMkdMathSpans, FG('codeSpans'),                      Markup.Mkd | Markup.CodeSpan),
109		(reReSTCodeSpan, FG('codeSpans'),                      Markup.ReST | Markup.CodeSpan),
110		(reHtmlTags,     FG('htmlTags') | QFont.Weight.Bold,   Markup.Mkd | Markup.Textile | Markup.HTML),
111		(reHtmlSymbols,  FG('htmlSymbols') | QFont.Weight.Bold, Markup.Mkd | Markup.HTML),
112		(reHtmlStrings,  FG('htmlStrings') | QFont.Weight.Bold, Markup.Mkd | Markup.HTML),
113		(reHtmlComments, FG('htmlComments'),                   Markup.Mkd | Markup.HTML),
114		(reAsterisks,    ITAL,                                 Markup.Mkd | Markup.ReST),
115		(reUnderline,    ITAL,                                 Markup.Mkd | Markup.Textile),
116		(reDblAsterisks, NF | QFont.Weight.Bold,               Markup.Mkd | Markup.ReST | Markup.Textile),
117		(reDblUnderline, NF | QFont.Weight.Bold,               Markup.Mkd),
118		(reTrpAsterisks, ITAL | QFont.Weight.Bold,             Markup.Mkd),
119		(reTrpUnderline, ITAL | QFont.Weight.Bold,             Markup.Mkd),
120		(reMkdHeaders,   NF | QFont.Weight.Black,              Markup.Mkd),
121		(reMkdLinksImgs, FG('markdownLinks'),                  Markup.Mkd),
122		(reMkdLinkRefs,  ITAL | UNDL,                          Markup.Mkd),
123		(reBlockQuotes,  FG('blockquotes'),                    Markup.Mkd),
124		(reReSTDirects,  FG('restDirectives') | QFont.Weight.Bold, Markup.ReST),
125		(reReSTRoles,    NF, FG('restRoles') | QFont.Weight.Bold, FG('htmlStrings'), Markup.ReST),
126		(reTextileHdrs,  NF | QFont.Weight.Black,              Markup.Textile),
127		(reTextileQuot,  FG('blockquotes'),                    Markup.Textile),
128		(reAsterisks,    NF | QFont.Weight.Bold,               Markup.Textile),
129		(reDblUnderline, ITAL,                                 Markup.Textile),
130		(reReSTLinks,    NF, NF, ITAL | UNDL, NF,              Markup.ReST),
131		(reReSTLinkRefs, NF, FG('markdownLinks'), ITAL | UNDL, Markup.ReST),
132		(reReSTFldLists, NF, FG('restDirectives'),             Markup.ReST),
133	)
134
135	def highlightBlock(self, text):
136		# Syntax highlighter
137		codeSpans = set()
138		if self.docType in docTypesMapping:
139			markup = docTypesMapping[self.docType]
140			for pattern, *formatters, markups in self.patterns:
141				if not (markup & markups):
142					continue
143				for match in pattern.finditer(text):
144					start, end = match.start(), match.end()
145					if markups & Markup.CodeSpan:
146						codeSpans.add((start, end))
147					elif any(start < codeEnd and end > codeStart
148					         for codeStart, codeEnd in codeSpans):
149						# Ignore any syntax if its match intersects with code spans.
150						# See https://github.com/retext-project/retext/issues/529
151						continue
152					for i, formatter in enumerate(formatters):
153						charFormat = QTextCharFormat()
154						formatter.format(charFormat)
155						self.setFormat(QString_length(text[:match.start(i)]),
156						               QString_length(match.group(i)),
157						               charFormat)
158		for match in reSpacesOnEnd.finditer(text):
159			charFormat = QTextCharFormat()
160			charFormat.setBackground(getColor('whitespaceOnEnd'))
161			self.setFormat(QString_length(text[:match.start()]),
162			               QString_length(match.group(0)),
163			               charFormat)
164		# Spell checker
165		if self.dictionary:
166			charFormat = QTextCharFormat()
167			charFormat.setUnderlineColor(Qt.GlobalColor.red)
168			charFormat.setUnderlineStyle(QTextCharFormat.UnderlineStyle.SpellCheckUnderline)
169			for match in reWords.finditer(text):
170				finalFormat = QTextCharFormat()
171				finalFormat.merge(charFormat)
172				finalFormat.merge(self.format(match.start()))
173				if not self.dictionary.check(match.group(0)):
174					self.setFormat(QString_length(text[:match.start()]),
175					               QString_length(match.group(0)),
176					               finalFormat)
177