1# -*- coding: utf-8 -*- 2from __future__ import unicode_literals 3import re 4import sys 5 6 7""" 8Python 3 Stuff 9============================================================================= 10""" 11PY3 = sys.version_info[0] == 3 12 13if PY3: # pragma: no cover 14 string_type = str 15 text_type = str 16 int2str = chr 17else: # pragma: no cover 18 string_type = basestring # noqa 19 text_type = unicode # noqa 20 int2str = unichr # noqa 21 22 23""" 24Constants you might want to modify 25----------------------------------------------------------------------------- 26""" 27 28 29BLOCK_LEVEL_ELEMENTS = re.compile( 30 r"^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" 31 r"|script|noscript|form|fieldset|iframe|math" 32 r"|hr|hr/|style|li|dt|dd|thead|tbody" 33 r"|tr|th|td|section|footer|header|group|figure" 34 r"|figcaption|aside|article|canvas|output" 35 r"|progress|video|nav|main)$", 36 re.IGNORECASE 37) 38# Placeholders 39STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder 40ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder 41INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" 42INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX 43INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') 44AMP_SUBSTITUTE = STX+"amp"+ETX 45HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX 46HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') 47TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX 48 49 50""" 51Constants you probably do not need to change 52----------------------------------------------------------------------------- 53""" 54 55RTL_BIDI_RANGES = ( 56 ('\u0590', '\u07FF'), 57 # Hebrew (0590-05FF), Arabic (0600-06FF), 58 # Syriac (0700-074F), Arabic supplement (0750-077F), 59 # Thaana (0780-07BF), Nko (07C0-07FF). 60 ('\u2D30', '\u2D7F') # Tifinagh 61) 62 63# Extensions should use "markdown.util.etree" instead of "etree" (or do `from 64# markdown.util import etree`). Do not import it by yourself. 65 66try: # pragma: no cover 67 # Is the C implementation of ElementTree available? 68 import xml.etree.cElementTree as etree 69 from xml.etree.ElementTree import Comment 70 # Serializers (including ours) test with non-c Comment 71 etree.test_comment = Comment 72 if etree.VERSION < "1.0.5": 73 raise RuntimeError("cElementTree version 1.0.5 or higher is required.") 74except (ImportError, RuntimeError): # pragma: no cover 75 # Use the Python implementation of ElementTree? 76 import xml.etree.ElementTree as etree 77 if etree.VERSION < "1.1": 78 raise RuntimeError("ElementTree version 1.1 or higher is required") 79 80 81""" 82AUXILIARY GLOBAL FUNCTIONS 83============================================================================= 84""" 85 86 87def isBlockLevel(tag): 88 """Check if the tag is a block level HTML tag.""" 89 if isinstance(tag, string_type): 90 return BLOCK_LEVEL_ELEMENTS.match(tag) 91 # Some ElementTree tags are not strings, so return False. 92 return False 93 94 95def parseBoolValue(value, fail_on_errors=True, preserve_none=False): 96 """Parses a string representing bool value. If parsing was successful, 97 returns True or False. If preserve_none=True, returns True, False, 98 or None. If parsing was not successful, raises ValueError, or, if 99 fail_on_errors=False, returns None.""" 100 if not isinstance(value, string_type): 101 if preserve_none and value is None: 102 return value 103 return bool(value) 104 elif preserve_none and value.lower() == 'none': 105 return None 106 elif value.lower() in ('true', 'yes', 'y', 'on', '1'): 107 return True 108 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): 109 return False 110 elif fail_on_errors: 111 raise ValueError('Cannot parse bool value: %r' % value) 112 113 114""" 115MISC AUXILIARY CLASSES 116============================================================================= 117""" 118 119 120class AtomicString(text_type): 121 """A string which should not be further processed.""" 122 pass 123 124 125class Processor(object): 126 def __init__(self, markdown_instance=None): 127 if markdown_instance: 128 self.markdown = markdown_instance 129 130 131class HtmlStash(object): 132 """ 133 This class is used for stashing HTML objects that we extract 134 in the beginning and replace with place-holders. 135 """ 136 137 def __init__(self): 138 """ Create a HtmlStash. """ 139 self.html_counter = 0 # for counting inline html segments 140 self.rawHtmlBlocks = [] 141 self.tag_counter = 0 142 self.tag_data = [] # list of dictionaries in the order tags appear 143 144 def store(self, html, safe=False): 145 """ 146 Saves an HTML segment for later reinsertion. Returns a 147 placeholder string that needs to be inserted into the 148 document. 149 150 Keyword arguments: 151 152 * html: an html segment 153 * safe: label an html segment as safe for safemode 154 155 Returns : a placeholder string 156 157 """ 158 self.rawHtmlBlocks.append((html, safe)) 159 placeholder = self.get_placeholder(self.html_counter) 160 self.html_counter += 1 161 return placeholder 162 163 def reset(self): 164 self.html_counter = 0 165 self.rawHtmlBlocks = [] 166 167 def get_placeholder(self, key): 168 return HTML_PLACEHOLDER % key 169 170 def store_tag(self, tag, attrs, left_index, right_index): 171 """Store tag data and return a placeholder.""" 172 self.tag_data.append({'tag': tag, 'attrs': attrs, 173 'left_index': left_index, 174 'right_index': right_index}) 175 placeholder = TAG_PLACEHOLDER % str(self.tag_counter) 176 self.tag_counter += 1 # equal to the tag's index in self.tag_data 177 return placeholder 178