1# This file is part of ReText 2# Copyright: 2012-2021 Dmitry Shachnev 3# 4# This program is free software; you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation; either version 2 of the License, or 7# (at your option) any later version. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# You should have received a copy of the GNU General Public License 15# along with this program. If not, see <http://www.gnu.org/licenses/>. 16 17from ReText.editor import getColor 18from enum import IntFlag, auto 19import re 20 21from PyQt5.QtCore import Qt 22from PyQt5.QtGui import QFont, QSyntaxHighlighter, QTextCharFormat 23 24reHtmlTags = re.compile('<[^<>@]*>') 25reHtmlSymbols = re.compile(r'&#?\w+;') 26reHtmlStrings = re.compile('"[^"<]*"(?=[^<]*>)') 27reHtmlComments = re.compile('<!--[^<>]*-->') 28reAsterisks = re.compile(r'(?<!\*)\*[^ \*][^\*]*\*') 29reUnderline = re.compile(r'(?<!_|\w)_[^_]+_(?!\w)') 30reDblAsterisks = re.compile(r'(?<!\*)\*\*((?!\*\*).)*\*\*') 31reDblUnderline = re.compile(r'(?<!_|\w)__[^_]+__(?!\w)') 32reTrpAsterisks = re.compile(r'\*{3,3}[^\*]+\*{3,3}') 33reTrpUnderline = re.compile('___[^_]+___') 34reMkdHeaders = re.compile('^#.+') 35reMkdLinksImgs = re.compile(r'(?<=\[)[^\[\]]*(?=\])') 36reMkdLinkRefs = re.compile(r'(?<=\]\()[^\(\)]*(?=\))') 37reBlockQuotes = re.compile('^ *>.+') 38reReSTDirects = re.compile(r'\.\. [a-z]+::') 39reReSTRoles = re.compile('(:[a-z-]+:)(`.+?`)') 40reReSTLinks = re.compile('(`.+?<)(.+?)(>`__?)') 41reReSTLinkRefs = re.compile(r'\.\. _`?(.*?)`?: (.*)') 42reReSTFldLists = re.compile('^ *:(.*?):') 43reTextileHdrs = re.compile(r'^h[1-6][()<>=]*\.\s.+') 44reTextileQuot = re.compile(r'^bq\.\s.+') 45reMkdCodeSpans = re.compile('`[^`]*`') 46reMkdMathSpans = re.compile(r'\\[\(\[].*?\\[\)\]]') 47reReSTCodeSpan = re.compile('``.+?``') 48reWords = re.compile('[^_\\W]+') 49reSpacesOnEnd = re.compile(r'\s+$') 50 51 52class Formatter: 53 def __init__(self, funcs=None): 54 self._funcs = funcs or [] 55 56 def __or__(self, other): 57 result = Formatter(self._funcs.copy()) 58 if isinstance(other, Formatter): 59 result._funcs.extend(other._funcs) 60 elif isinstance(other, QFont.Weight): 61 result._funcs.append(lambda f: f.setFontWeight(other)) 62 return result 63 64 def format(self, charFormat): 65 for func in self._funcs: 66 func(charFormat) 67 68NF = Formatter() 69ITAL = Formatter([lambda f: f.setFontItalic(True)]) 70UNDL = Formatter([lambda f: f.setFontUnderline(True)]) 71 72def FG(colorName): 73 func = lambda f: f.setForeground(getColor(colorName)) 74 return Formatter([func]) 75 76def QString_length(text): 77 # In QString, surrogate pairs are represented using multiple QChars, 78 # so the length of QString is not always equal to the number of graphemes 79 # in it (which is the case with Python strings). 80 return sum(2 if ord(char) > 65535 else 1 for char in text) 81 82 83class Markup(IntFlag): 84 Mkd = auto() 85 ReST = auto() 86 Textile = auto() 87 HTML = auto() 88 89 # Special value which means that no other markup is allowed inside this pattern 90 CodeSpan = auto() 91 92 93docTypesMapping = { 94 'Markdown': Markup.Mkd, 95 'reStructuredText': Markup.ReST, 96 'Textile': Markup.Textile, 97 'html': Markup.HTML, 98} 99 100 101class ReTextHighlighter(QSyntaxHighlighter): 102 dictionary = None 103 docType = None 104 105 patterns = ( 106 # regex, color, markups 107 (reMkdCodeSpans, FG('codeSpans'), Markup.Mkd | Markup.CodeSpan), 108 (reMkdMathSpans, FG('codeSpans'), Markup.Mkd | Markup.CodeSpan), 109 (reReSTCodeSpan, FG('codeSpans'), Markup.ReST | Markup.CodeSpan), 110 (reHtmlTags, FG('htmlTags') | QFont.Weight.Bold, Markup.Mkd | Markup.Textile | Markup.HTML), 111 (reHtmlSymbols, FG('htmlSymbols') | QFont.Weight.Bold, Markup.Mkd | Markup.HTML), 112 (reHtmlStrings, FG('htmlStrings') | QFont.Weight.Bold, Markup.Mkd | Markup.HTML), 113 (reHtmlComments, FG('htmlComments'), Markup.Mkd | Markup.HTML), 114 (reAsterisks, ITAL, Markup.Mkd | Markup.ReST), 115 (reUnderline, ITAL, Markup.Mkd | Markup.Textile), 116 (reDblAsterisks, NF | QFont.Weight.Bold, Markup.Mkd | Markup.ReST | Markup.Textile), 117 (reDblUnderline, NF | QFont.Weight.Bold, Markup.Mkd), 118 (reTrpAsterisks, ITAL | QFont.Weight.Bold, Markup.Mkd), 119 (reTrpUnderline, ITAL | QFont.Weight.Bold, Markup.Mkd), 120 (reMkdHeaders, NF | QFont.Weight.Black, Markup.Mkd), 121 (reMkdLinksImgs, FG('markdownLinks'), Markup.Mkd), 122 (reMkdLinkRefs, ITAL | UNDL, Markup.Mkd), 123 (reBlockQuotes, FG('blockquotes'), Markup.Mkd), 124 (reReSTDirects, FG('restDirectives') | QFont.Weight.Bold, Markup.ReST), 125 (reReSTRoles, NF, FG('restRoles') | QFont.Weight.Bold, FG('htmlStrings'), Markup.ReST), 126 (reTextileHdrs, NF | QFont.Weight.Black, Markup.Textile), 127 (reTextileQuot, FG('blockquotes'), Markup.Textile), 128 (reAsterisks, NF | QFont.Weight.Bold, Markup.Textile), 129 (reDblUnderline, ITAL, Markup.Textile), 130 (reReSTLinks, NF, NF, ITAL | UNDL, NF, Markup.ReST), 131 (reReSTLinkRefs, NF, FG('markdownLinks'), ITAL | UNDL, Markup.ReST), 132 (reReSTFldLists, NF, FG('restDirectives'), Markup.ReST), 133 ) 134 135 def highlightBlock(self, text): 136 # Syntax highlighter 137 codeSpans = set() 138 if self.docType in docTypesMapping: 139 markup = docTypesMapping[self.docType] 140 for pattern, *formatters, markups in self.patterns: 141 if not (markup & markups): 142 continue 143 for match in pattern.finditer(text): 144 start, end = match.start(), match.end() 145 if markups & Markup.CodeSpan: 146 codeSpans.add((start, end)) 147 elif any(start < codeEnd and end > codeStart 148 for codeStart, codeEnd in codeSpans): 149 # Ignore any syntax if its match intersects with code spans. 150 # See https://github.com/retext-project/retext/issues/529 151 continue 152 for i, formatter in enumerate(formatters): 153 charFormat = QTextCharFormat() 154 formatter.format(charFormat) 155 self.setFormat(QString_length(text[:match.start(i)]), 156 QString_length(match.group(i)), 157 charFormat) 158 for match in reSpacesOnEnd.finditer(text): 159 charFormat = QTextCharFormat() 160 charFormat.setBackground(getColor('whitespaceOnEnd')) 161 self.setFormat(QString_length(text[:match.start()]), 162 QString_length(match.group(0)), 163 charFormat) 164 # Spell checker 165 if self.dictionary: 166 charFormat = QTextCharFormat() 167 charFormat.setUnderlineColor(Qt.GlobalColor.red) 168 charFormat.setUnderlineStyle(QTextCharFormat.UnderlineStyle.SpellCheckUnderline) 169 for match in reWords.finditer(text): 170 finalFormat = QTextCharFormat() 171 finalFormat.merge(charFormat) 172 finalFormat.merge(self.format(match.start())) 173 if not self.dictionary.check(match.group(0)): 174 self.setFormat(QString_length(text[:match.start()]), 175 QString_length(match.group(0)), 176 finalFormat) 177