1import html.entities
2from typing import Dict, List, Optional
3
4from . import config
5
6unifiable_n = {
7    html.entities.name2codepoint[k]: v
8    for k, v in config.UNIFIABLE.items()
9    if k != "nbsp"
10}
11
12
13def hn(tag: str) -> int:
14    if tag[0] == "h" and len(tag) == 2:
15        n = tag[1]
16        if "0" < n <= "9":
17            return int(n)
18    return 0
19
20
21def dumb_property_dict(style: str) -> Dict[str, str]:
22    """
23    :returns: A hash of css attributes
24    """
25    return {
26        x.strip().lower(): y.strip().lower()
27        for x, y in [z.split(":", 1) for z in style.split(";") if ":" in z]
28    }
29
30
31def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
32    """
33    :type data: str
34
35    :returns: A hash of css selectors, each of which contains a hash of
36    css attributes.
37    :rtype: dict
38    """
39    # remove @import sentences
40    data += ";"
41    importIndex = data.find("@import")
42    while importIndex != -1:
43        data = data[0:importIndex] + data[data.find(";", importIndex) + 1 :]
44        importIndex = data.find("@import")
45
46    # parse the css. reverted from dictionary comprehension in order to
47    # support older pythons
48    pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
49    try:
50        elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
51    except ValueError:
52        elements = {}  # not that important
53
54    return elements
55
56
57def element_style(
58    attrs: Dict[str, Optional[str]],
59    style_def: Dict[str, Dict[str, str]],
60    parent_style: Dict[str, str],
61) -> Dict[str, str]:
62    """
63    :type attrs: dict
64    :type style_def: dict
65    :type style_def: dict
66
67    :returns: A hash of the 'final' style attributes of the element
68    :rtype: dict
69    """
70    style = parent_style.copy()
71    if "class" in attrs:
72        assert attrs["class"] is not None
73        for css_class in attrs["class"].split():
74            css_style = style_def.get("." + css_class, {})
75            style.update(css_style)
76    if "style" in attrs:
77        assert attrs["style"] is not None
78        immediate_style = dumb_property_dict(attrs["style"])
79        style.update(immediate_style)
80
81    return style
82
83
84def google_list_style(style: Dict[str, str]) -> str:
85    """
86    Finds out whether this is an ordered or unordered list
87
88    :type style: dict
89
90    :rtype: str
91    """
92    if "list-style-type" in style:
93        list_style = style["list-style-type"]
94        if list_style in ["disc", "circle", "square", "none"]:
95            return "ul"
96
97    return "ol"
98
99
100def google_has_height(style: Dict[str, str]) -> bool:
101    """
102    Check if the style of the element has the 'height' attribute
103    explicitly defined
104
105    :type style: dict
106
107    :rtype: bool
108    """
109    return "height" in style
110
111
112def google_text_emphasis(style: Dict[str, str]) -> List[str]:
113    """
114    :type style: dict
115
116    :returns: A list of all emphasis modifiers of the element
117    :rtype: list
118    """
119    emphasis = []
120    if "text-decoration" in style:
121        emphasis.append(style["text-decoration"])
122    if "font-style" in style:
123        emphasis.append(style["font-style"])
124    if "font-weight" in style:
125        emphasis.append(style["font-weight"])
126
127    return emphasis
128
129
130def google_fixed_width_font(style: Dict[str, str]) -> bool:
131    """
132    Check if the css of the current element defines a fixed width font
133
134    :type style: dict
135
136    :rtype: bool
137    """
138    font_family = ""
139    if "font-family" in style:
140        font_family = style["font-family"]
141    return "courier new" == font_family or "consolas" == font_family
142
143
144def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
145    """
146    Extract numbering from list element attributes
147
148    :type attrs: dict
149
150    :rtype: int or None
151    """
152    if "start" in attrs:
153        assert attrs["start"] is not None
154        try:
155            return int(attrs["start"]) - 1
156        except ValueError:
157            pass
158
159    return 0
160
161
162def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool) -> bool:
163    # If it appears to contain a link
164    # don't wrap
165    if not wrap_links and config.RE_LINK.search(para):
166        return True
167    # If the text begins with four spaces or one tab, it's a code block;
168    # don't wrap
169    if para[0:4] == "    " or para[0] == "\t":
170        return True
171
172    # If the text begins with only two "--", possibly preceded by
173    # whitespace, that's an emdash; so wrap.
174    stripped = para.lstrip()
175    if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
176        return False
177
178    # I'm not sure what this is for; I thought it was to detect lists,
179    # but there's a <br>-inside-<span> case in one of the tests that
180    # also depends upon it.
181    if stripped[0:1] in ("-", "*") and not stripped[0:2] == "**":
182        return not wrap_list_items
183
184    # If the text begins with a single -, *, or +, followed by a space,
185    # or an integer, followed by a ., followed by a space (in either
186    # case optionally proceeded by whitespace), it's a list; don't wrap.
187    return bool(
188        config.RE_ORDERED_LIST_MATCHER.match(stripped)
189        or config.RE_UNORDERED_LIST_MATCHER.match(stripped)
190    )
191
192
193def escape_md(text: str) -> str:
194    """
195    Escapes markdown-sensitive characters within other markdown
196    constructs.
197    """
198    return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text)
199
200
201def escape_md_section(text: str, snob: bool = False) -> str:
202    """
203    Escapes markdown-sensitive characters across whole document sections.
204    """
205    text = config.RE_MD_BACKSLASH_MATCHER.sub(r"\\\1", text)
206
207    if snob:
208        text = config.RE_MD_CHARS_MATCHER_ALL.sub(r"\\\1", text)
209
210    text = config.RE_MD_DOT_MATCHER.sub(r"\1\\\2", text)
211    text = config.RE_MD_PLUS_MATCHER.sub(r"\1\\\2", text)
212    text = config.RE_MD_DASH_MATCHER.sub(r"\1\\\2", text)
213
214    return text
215
216
217def reformat_table(lines: List[str], right_margin: int) -> List[str]:
218    """
219    Given the lines of a table
220    padds the cells and returns the new lines
221    """
222    # find the maximum width of the columns
223    max_width = [len(x.rstrip()) + right_margin for x in lines[0].split("|")]
224    max_cols = len(max_width)
225    for line in lines:
226        cols = [x.rstrip() for x in line.split("|")]
227        num_cols = len(cols)
228
229        # don't drop any data if colspan attributes result in unequal lengths
230        if num_cols < max_cols:
231            cols += [""] * (max_cols - num_cols)
232        elif max_cols < num_cols:
233            max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
234            max_cols = num_cols
235
236        max_width = [
237            max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
238        ]
239
240    # reformat
241    new_lines = []
242    for line in lines:
243        cols = [x.rstrip() for x in line.split("|")]
244        if set(line.strip()) == set("-|"):
245            filler = "-"
246            new_cols = [
247                x.rstrip() + (filler * (M - len(x.rstrip())))
248                for x, M in zip(cols, max_width)
249            ]
250        else:
251            filler = " "
252            new_cols = [
253                x.rstrip() + (filler * (M - len(x.rstrip())))
254                for x, M in zip(cols, max_width)
255            ]
256        new_lines.append("|".join(new_cols))
257    return new_lines
258
259
260def pad_tables_in_text(text: str, right_margin: int = 1) -> str:
261    """
262    Provide padding for tables in the text
263    """
264    lines = text.split("\n")
265    table_buffer = []  # type: List[str]
266    table_started = False
267    new_lines = []
268    for line in lines:
269        # Toggle table started
270        if config.TABLE_MARKER_FOR_PAD in line:
271            table_started = not table_started
272            if not table_started:
273                table = reformat_table(table_buffer, right_margin)
274                new_lines.extend(table)
275                table_buffer = []
276                new_lines.append("")
277            continue
278        # Process lines
279        if table_started:
280            table_buffer.append(line)
281        else:
282            new_lines.append(line)
283    return "\n".join(new_lines)
284