1# Copyright 2008-2015 Nokia Networks 2# Copyright 2016- Robot Framework Foundation 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import re 17from functools import partial 18from itertools import cycle 19 20 21class LinkFormatter(object): 22 _image_exts = ('.jpg', '.jpeg', '.png', '.gif', '.bmp') 23 _link = re.compile('\[(.+?\|.*?)\]') 24 _url = re.compile(''' 25((^|\ ) ["'\(\[]*) # begin of line or space and opt. any char "'([ 26([a-z][\w+-.]*://[^\s|]+?) # url 27(?=[\]\)|"'.,!?:;]* ($|\ )) # opt. any char ])"'.,!?:; and end of line or space 28''', re.VERBOSE|re.MULTILINE|re.IGNORECASE) 29 30 def format_url(self, text): 31 return self._format_url(text, format_as_image=False) 32 33 def _format_url(self, text, format_as_image=True): 34 if '://' not in text: 35 return text 36 return self._url.sub(partial(self._replace_url, format_as_image), text) 37 38 def _replace_url(self, format_as_image, match): 39 pre = match.group(1) 40 url = match.group(3) 41 if format_as_image and self._is_image(url): 42 return pre + self._get_image(url) 43 return pre + self._get_link(url) 44 45 def _get_image(self, src, title=None): 46 return '<img src="%s" title="%s">' \ 47 % (self._quot(src), self._quot(title or src)) 48 49 def _get_link(self, href, content=None): 50 return '<a href="%s">%s</a>' % (self._quot(href), content or href) 51 52 def _quot(self, attr): 53 return attr if '"' not in attr else attr.replace('"', '"') 54 55 def format_link(self, text): 56 # 2nd, 4th, etc. token contains link, others surrounding content 57 tokens = self._link.split(text) 58 formatters = cycle((self._format_url, self._format_link)) 59 return ''.join(f(t) for f, t in zip(formatters, tokens)) 60 61 def _format_link(self, text): 62 link, content = [t.strip() for t in text.split('|', 1)] 63 if self._is_image(content): 64 content = self._get_image(content, link) 65 elif self._is_image(link): 66 return self._get_image(link, content) 67 return self._get_link(link, content) 68 69 def _is_image(self, text): 70 return text.lower().endswith(self._image_exts) 71 72 73class LineFormatter(object): 74 handles = lambda self, line: True 75 newline = '\n' 76 _bold = re.compile(''' 77( # prefix (group 1) 78 (^|\ ) # begin of line or space 79 ["'(]* _? # optionally any char "'( and optional begin of italic 80) # 81\* # start of bold 82([^\ ].*?) # no space and then anything (group 3) 83\* # end of bold 84(?= # start of postfix (non-capturing group) 85 _? ["').,!?:;]* # optional end of italic and any char "').,!?:; 86 ($|\ ) # end of line or space 87) 88''', re.VERBOSE) 89 _italic = re.compile(''' 90( (^|\ ) ["'(]* ) # begin of line or space and opt. any char "'( 91_ # start of italic 92([^\ _].*?) # no space or underline and then anything 93_ # end of italic 94(?= ["').,!?:;]* ($|\ ) ) # opt. any char "').,!?:; and end of line or space 95''', re.VERBOSE) 96 _code = re.compile(''' 97( (^|\ ) ["'(]* ) # same as above with _ changed to `` 98`` 99([^\ `].*?) 100`` 101(?= ["').,!?:;]* ($|\ ) ) 102''', re.VERBOSE) 103 104 def __init__(self): 105 self._formatters = [('*', self._format_bold), 106 ('_', self._format_italic), 107 ('``', self._format_code), 108 ('', LinkFormatter().format_link)] 109 110 def format(self, line): 111 for marker, formatter in self._formatters: 112 if marker in line: 113 line = formatter(line) 114 return line 115 116 def _format_bold(self, line): 117 return self._bold.sub('\\1<b>\\3</b>', line) 118 119 def _format_italic(self, line): 120 return self._italic.sub('\\1<i>\\3</i>', line) 121 122 def _format_code(self, line): 123 return self._code.sub('\\1<code>\\3</code>', line) 124 125 126class HtmlFormatter(object): 127 128 def __init__(self): 129 self._results = [] 130 self._formatters = [TableFormatter(), 131 PreformattedFormatter(), 132 ListFormatter(), 133 HeaderFormatter(), 134 RulerFormatter()] 135 self._formatters.append(ParagraphFormatter(self._formatters[:])) 136 self._current = None 137 138 def format(self, text): 139 for line in text.splitlines(): 140 self._process_line(line) 141 self._end_current() 142 return '\n'.join(self._results) 143 144 def _process_line(self, line): 145 if not line.strip(): 146 self._end_current() 147 elif self._current and self._current.handles(line): 148 self._current.add(line) 149 else: 150 self._end_current() 151 self._current = self._find_formatter(line) 152 self._current.add(line) 153 154 def _end_current(self): 155 if self._current: 156 self._results.append(self._current.end()) 157 self._current = None 158 159 def _find_formatter(self, line): 160 for formatter in self._formatters: 161 if formatter.handles(line): 162 return formatter 163 164 165class _Formatter(object): 166 _strip_lines = True 167 168 def __init__(self): 169 self._lines = [] 170 171 def handles(self, line): 172 return self._handles(line.strip() if self._strip_lines else line) 173 174 def _handles(self, line): 175 raise NotImplementedError 176 177 def add(self, line): 178 self._lines.append(line.strip() if self._strip_lines else line) 179 180 def end(self): 181 result = self.format(self._lines) 182 self._lines = [] 183 return result 184 185 def format(self, lines): 186 raise NotImplementedError 187 188 189class _SingleLineFormatter(_Formatter): 190 191 def _handles(self, line): 192 return not self._lines and self.match(line) 193 194 def match(self, line): 195 raise NotImplementedError 196 197 def format(self, lines): 198 return self.format_line(lines[0]) 199 200 def format_line(self, line): 201 raise NotImplementedError 202 203 204class RulerFormatter(_SingleLineFormatter): 205 match = re.compile('^-{3,}$').match 206 207 def format_line(self, line): 208 return '<hr>' 209 210 211class HeaderFormatter(_SingleLineFormatter): 212 match = re.compile(r'^(={1,3})\s+(\S.*?)\s+\1$').match 213 214 def format_line(self, line): 215 level, text = self.match(line).groups() 216 level = len(level) + 1 217 return '<h%d>%s</h%d>' % (level, text, level) 218 219 220class ParagraphFormatter(_Formatter): 221 _format_line = LineFormatter().format 222 223 def __init__(self, other_formatters): 224 _Formatter.__init__(self) 225 self._other_formatters = other_formatters 226 227 def _handles(self, line): 228 return not any(other.handles(line) 229 for other in self._other_formatters) 230 231 def format(self, lines): 232 return '<p>%s</p>' % self._format_line(' '.join(lines)) 233 234 235class TableFormatter(_Formatter): 236 _table_line = re.compile('^\| (.* |)\|$') 237 _line_splitter = re.compile(' \|(?= )') 238 _format_cell_content = LineFormatter().format 239 240 def _handles(self, line): 241 return self._table_line.match(line) is not None 242 243 def format(self, lines): 244 return self._format_table([self._split_to_cells(l) for l in lines]) 245 246 def _split_to_cells(self, line): 247 return [cell.strip() for cell in self._line_splitter.split(line[1:-1])] 248 249 def _format_table(self, rows): 250 maxlen = max(len(row) for row in rows) 251 table = ['<table border="1">'] 252 for row in rows: 253 row += [''] * (maxlen - len(row)) # fix ragged tables 254 table.append('<tr>') 255 table.extend(self._format_cell(cell) for cell in row) 256 table.append('</tr>') 257 table.append('</table>') 258 return '\n'.join(table) 259 260 def _format_cell(self, content): 261 if content.startswith('=') and content.endswith('='): 262 tx = 'th' 263 content = content[1:-1].strip() 264 else: 265 tx = 'td' 266 return '<%s>%s</%s>' % (tx, self._format_cell_content(content), tx) 267 268 269class PreformattedFormatter(_Formatter): 270 _format_line = LineFormatter().format 271 272 def _handles(self, line): 273 return line.startswith('| ') or line == '|' 274 275 def format(self, lines): 276 lines = [self._format_line(line[2:]) for line in lines] 277 return '\n'.join(['<pre>'] + lines + ['</pre>']) 278 279 280class ListFormatter(_Formatter): 281 _strip_lines = False 282 _format_item = LineFormatter().format 283 284 def _handles(self, line): 285 return line.strip().startswith('- ') or \ 286 line.startswith(' ') and self._lines 287 288 def format(self, lines): 289 items = ['<li>%s</li>' % self._format_item(line) 290 for line in self._combine_lines(lines)] 291 return '\n'.join(['<ul>'] + items + ['</ul>']) 292 293 def _combine_lines(self, lines): 294 current = [] 295 for line in lines: 296 line = line.strip() 297 if not line.startswith('- '): 298 current.append(line) 299 continue 300 if current: 301 yield ' '.join(current) 302 current = [line[2:].strip()] 303 yield ' '.join(current) 304