1#  Copyright 2008-2015 Nokia Networks
2#  Copyright 2016-     Robot Framework Foundation
3#
4#  Licensed under the Apache License, Version 2.0 (the "License");
5#  you may not use this file except in compliance with the License.
6#  You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10#  Unless required by applicable law or agreed to in writing, software
11#  distributed under the License is distributed on an "AS IS" BASIS,
12#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13#  See the License for the specific language governing permissions and
14#  limitations under the License.
15
16import re
17from functools import partial
18from itertools import cycle
19
20
21class LinkFormatter(object):
22    _image_exts = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')
23    _link = re.compile('\[(.+?\|.*?)\]')
24    _url = re.compile('''
25((^|\ ) ["'\(\[]*)           # begin of line or space and opt. any char "'([
26([a-z][\w+-.]*://[^\s|]+?)   # url
27(?=[\]\)|"'.,!?:;]* ($|\ ))   # opt. any char ])"'.,!?:; and end of line or space
28''', re.VERBOSE|re.MULTILINE|re.IGNORECASE)
29
30    def format_url(self, text):
31        return self._format_url(text, format_as_image=False)
32
33    def _format_url(self, text, format_as_image=True):
34        if '://' not in text:
35            return text
36        return self._url.sub(partial(self._replace_url, format_as_image), text)
37
38    def _replace_url(self, format_as_image, match):
39        pre = match.group(1)
40        url = match.group(3)
41        if format_as_image and self._is_image(url):
42            return pre + self._get_image(url)
43        return pre + self._get_link(url)
44
45    def _get_image(self, src, title=None):
46        return '<img src="%s" title="%s">' \
47                % (self._quot(src), self._quot(title or src))
48
49    def _get_link(self, href, content=None):
50        return '<a href="%s">%s</a>' % (self._quot(href), content or href)
51
52    def _quot(self, attr):
53        return attr if '"' not in attr else attr.replace('"', '&quot;')
54
55    def format_link(self, text):
56        # 2nd, 4th, etc. token contains link, others surrounding content
57        tokens = self._link.split(text)
58        formatters = cycle((self._format_url, self._format_link))
59        return ''.join(f(t) for f, t in zip(formatters, tokens))
60
61    def _format_link(self, text):
62        link, content = [t.strip() for t in text.split('|', 1)]
63        if self._is_image(content):
64            content = self._get_image(content, link)
65        elif self._is_image(link):
66            return self._get_image(link, content)
67        return self._get_link(link, content)
68
69    def _is_image(self, text):
70        return text.lower().endswith(self._image_exts)
71
72
73class LineFormatter(object):
74    handles = lambda self, line: True
75    newline = '\n'
76    _bold = re.compile('''
77(                         # prefix (group 1)
78  (^|\ )                  # begin of line or space
79  ["'(]* _?               # optionally any char "'( and optional begin of italic
80)                         #
81\*                        # start of bold
82([^\ ].*?)                # no space and then anything (group 3)
83\*                        # end of bold
84(?=                       # start of postfix (non-capturing group)
85  _? ["').,!?:;]*         # optional end of italic and any char "').,!?:;
86  ($|\ )                  # end of line or space
87)
88''', re.VERBOSE)
89    _italic = re.compile('''
90( (^|\ ) ["'(]* )          # begin of line or space and opt. any char "'(
91_                          # start of italic
92([^\ _].*?)                # no space or underline and then anything
93_                          # end of italic
94(?= ["').,!?:;]* ($|\ ) )  # opt. any char "').,!?:; and end of line or space
95''', re.VERBOSE)
96    _code = re.compile('''
97( (^|\ ) ["'(]* )          # same as above with _ changed to ``
98``
99([^\ `].*?)
100``
101(?= ["').,!?:;]* ($|\ ) )
102''', re.VERBOSE)
103
104    def __init__(self):
105        self._formatters = [('*', self._format_bold),
106                            ('_', self._format_italic),
107                            ('``', self._format_code),
108                            ('', LinkFormatter().format_link)]
109
110    def format(self, line):
111        for marker, formatter in self._formatters:
112            if marker in line:
113                line = formatter(line)
114        return line
115
116    def _format_bold(self, line):
117        return self._bold.sub('\\1<b>\\3</b>', line)
118
119    def _format_italic(self, line):
120        return self._italic.sub('\\1<i>\\3</i>', line)
121
122    def _format_code(self, line):
123        return self._code.sub('\\1<code>\\3</code>', line)
124
125
126class HtmlFormatter(object):
127
128    def __init__(self):
129        self._results = []
130        self._formatters = [TableFormatter(),
131                            PreformattedFormatter(),
132                            ListFormatter(),
133                            HeaderFormatter(),
134                            RulerFormatter()]
135        self._formatters.append(ParagraphFormatter(self._formatters[:]))
136        self._current = None
137
138    def format(self, text):
139        for line in text.splitlines():
140            self._process_line(line)
141        self._end_current()
142        return '\n'.join(self._results)
143
144    def _process_line(self, line):
145        if not line.strip():
146            self._end_current()
147        elif self._current and self._current.handles(line):
148            self._current.add(line)
149        else:
150            self._end_current()
151            self._current = self._find_formatter(line)
152            self._current.add(line)
153
154    def _end_current(self):
155        if self._current:
156            self._results.append(self._current.end())
157            self._current = None
158
159    def _find_formatter(self, line):
160        for formatter in self._formatters:
161            if formatter.handles(line):
162                return formatter
163
164
165class _Formatter(object):
166    _strip_lines = True
167
168    def __init__(self):
169        self._lines = []
170
171    def handles(self, line):
172        return self._handles(line.strip() if self._strip_lines else line)
173
174    def _handles(self, line):
175        raise NotImplementedError
176
177    def add(self, line):
178        self._lines.append(line.strip() if self._strip_lines else line)
179
180    def end(self):
181        result = self.format(self._lines)
182        self._lines = []
183        return result
184
185    def format(self, lines):
186        raise NotImplementedError
187
188
189class _SingleLineFormatter(_Formatter):
190
191    def _handles(self, line):
192        return not self._lines and self.match(line)
193
194    def match(self, line):
195        raise NotImplementedError
196
197    def format(self, lines):
198        return self.format_line(lines[0])
199
200    def format_line(self, line):
201        raise NotImplementedError
202
203
204class RulerFormatter(_SingleLineFormatter):
205    match = re.compile('^-{3,}$').match
206
207    def format_line(self, line):
208        return '<hr>'
209
210
211class HeaderFormatter(_SingleLineFormatter):
212    match = re.compile(r'^(={1,3})\s+(\S.*?)\s+\1$').match
213
214    def format_line(self, line):
215        level, text = self.match(line).groups()
216        level = len(level) + 1
217        return '<h%d>%s</h%d>' % (level, text, level)
218
219
220class ParagraphFormatter(_Formatter):
221    _format_line = LineFormatter().format
222
223    def __init__(self, other_formatters):
224        _Formatter.__init__(self)
225        self._other_formatters = other_formatters
226
227    def _handles(self, line):
228        return not any(other.handles(line)
229                       for other in self._other_formatters)
230
231    def format(self, lines):
232        return '<p>%s</p>' % self._format_line(' '.join(lines))
233
234
235class TableFormatter(_Formatter):
236    _table_line = re.compile('^\| (.* |)\|$')
237    _line_splitter = re.compile(' \|(?= )')
238    _format_cell_content = LineFormatter().format
239
240    def _handles(self, line):
241        return self._table_line.match(line) is not None
242
243    def format(self, lines):
244        return self._format_table([self._split_to_cells(l) for l in lines])
245
246    def _split_to_cells(self, line):
247        return [cell.strip() for cell in self._line_splitter.split(line[1:-1])]
248
249    def _format_table(self, rows):
250        maxlen = max(len(row) for row in rows)
251        table = ['<table border="1">']
252        for row in rows:
253            row += [''] * (maxlen - len(row))  # fix ragged tables
254            table.append('<tr>')
255            table.extend(self._format_cell(cell) for cell in row)
256            table.append('</tr>')
257        table.append('</table>')
258        return '\n'.join(table)
259
260    def _format_cell(self, content):
261        if content.startswith('=') and content.endswith('='):
262            tx = 'th'
263            content = content[1:-1].strip()
264        else:
265            tx = 'td'
266        return '<%s>%s</%s>' % (tx, self._format_cell_content(content), tx)
267
268
269class PreformattedFormatter(_Formatter):
270    _format_line = LineFormatter().format
271
272    def _handles(self, line):
273        return line.startswith('| ') or line == '|'
274
275    def format(self, lines):
276        lines = [self._format_line(line[2:]) for line in lines]
277        return '\n'.join(['<pre>'] + lines + ['</pre>'])
278
279
280class ListFormatter(_Formatter):
281    _strip_lines = False
282    _format_item = LineFormatter().format
283
284    def _handles(self, line):
285        return line.strip().startswith('- ') or \
286                line.startswith(' ') and self._lines
287
288    def format(self, lines):
289        items = ['<li>%s</li>' % self._format_item(line)
290                 for line in self._combine_lines(lines)]
291        return '\n'.join(['<ul>'] + items + ['</ul>'])
292
293    def _combine_lines(self, lines):
294        current = []
295        for line in lines:
296            line = line.strip()
297            if not line.startswith('- '):
298                current.append(line)
299                continue
300            if current:
301                yield ' '.join(current)
302            current = [line[2:].strip()]
303        yield ' '.join(current)
304