1from typing import List, Optional, Tuple, TYPE_CHECKING 2 3from ..token import Token 4from ..ruler import StateBase 5from ..common.utils import isSpace 6 7if TYPE_CHECKING: 8 from markdown_it.main import MarkdownIt 9 10 11class StateBlock(StateBase): 12 def __init__( 13 self, 14 src: str, 15 md: "MarkdownIt", 16 env, 17 tokens: List[Token], 18 srcCharCode: Optional[Tuple[int, ...]] = None, 19 ): 20 21 if srcCharCode is not None: 22 self._src = src 23 self.srcCharCode = srcCharCode 24 else: 25 self.src = src 26 27 # link to parser instance 28 self.md = md 29 30 self.env = env 31 32 # 33 # Internal state variables 34 # 35 36 self.tokens = tokens 37 38 self.bMarks = [] # line begin offsets for fast jumps 39 self.eMarks = [] # line end offsets for fast jumps 40 # offsets of the first non-space characters (tabs not expanded) 41 self.tShift = [] 42 self.sCount = [] # indents for each line (tabs expanded) 43 44 # An amount of virtual spaces (tabs expanded) between beginning 45 # of each line (bMarks) and real beginning of that line. 46 # 47 # It exists only as a hack because blockquotes override bMarks 48 # losing information in the process. 49 # 50 # It's used only when expanding tabs, you can think about it as 51 # an initial tab length, e.g. bsCount=21 applied to string `\t123` 52 # means first tab should be expanded to 4-21%4 === 3 spaces. 53 # 54 self.bsCount = [] 55 56 # block parser variables 57 self.blkIndent = 0 # required block content indent (for example, if we are 58 # inside a list, it would be positioned after list marker) 59 self.line = 0 # line index in src 60 self.lineMax = 0 # lines count 61 self.tight = False # loose/tight mode for lists 62 self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any) 63 self.listIndent = -1 # indent of the current list block (-1 if there isn't any) 64 65 # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference' 66 # used in lists to determine if they interrupt a paragraph 67 self.parentType = "root" 68 69 self.level = 0 70 71 # renderer 72 self.result = "" 73 74 # Create caches 75 # Generate markers. 76 indent_found = False 77 78 start = pos = indent = offset = 0 79 length = len(self.src) 80 81 for pos, character in enumerate(self.srcCharCode): 82 if not indent_found: 83 if isSpace(character): 84 indent += 1 85 86 if character == 0x09: 87 offset += 4 - offset % 4 88 else: 89 offset += 1 90 continue 91 else: 92 indent_found = True 93 94 if character == 0x0A or pos == length - 1: 95 if character != 0x0A: 96 pos += 1 97 self.bMarks.append(start) 98 self.eMarks.append(pos) 99 self.tShift.append(indent) 100 self.sCount.append(offset) 101 self.bsCount.append(0) 102 103 indent_found = False 104 indent = 0 105 offset = 0 106 start = pos + 1 107 108 # Push fake entry to simplify cache bounds checks 109 self.bMarks.append(length) 110 self.eMarks.append(length) 111 self.tShift.append(0) 112 self.sCount.append(0) 113 self.bsCount.append(0) 114 115 self.lineMax = len(self.bMarks) - 1 # don't count last fake line 116 117 def __repr__(self): 118 return ( 119 f"{self.__class__.__name__}" 120 f"(line={self.line},level={self.level},tokens={len(self.tokens)})" 121 ) 122 123 def push(self, ttype: str, tag: str, nesting: int) -> Token: 124 """Push new token to "stream".""" 125 token = Token(ttype, tag, nesting) 126 token.block = True 127 if nesting < 0: 128 self.level -= 1 # closing tag 129 token.level = self.level 130 if nesting > 0: 131 self.level += 1 # opening tag 132 self.tokens.append(token) 133 return token 134 135 def isEmpty(self, line: int) -> bool: 136 """.""" 137 return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line] 138 139 def skipEmptyLines(self, from_pos: int) -> int: 140 """.""" 141 while from_pos < self.lineMax: 142 try: 143 if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[ 144 from_pos 145 ]: 146 break 147 except IndexError: 148 from_pos += 1 149 break 150 from_pos += 1 151 return from_pos 152 153 def skipSpaces(self, pos: int) -> int: 154 """Skip spaces from given position.""" 155 while pos < len(self.src): 156 if not isSpace(self.srcCharCode[pos]): 157 break 158 pos += 1 159 return pos 160 161 def skipSpacesBack(self, pos: int, minimum: int) -> int: 162 """Skip spaces from given position in reverse.""" 163 if pos <= minimum: 164 return pos 165 while pos > minimum: 166 pos -= 1 167 if not isSpace(self.srcCharCode[pos]): 168 return pos + 1 169 return pos 170 171 def skipChars(self, pos: int, code: int) -> int: 172 """Skip char codes from given position.""" 173 while pos < len(self.src): 174 if self.srcCharCode[pos] != code: 175 break 176 pos += 1 177 return pos 178 179 def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: 180 """Skip char codes reverse from given position - 1.""" 181 if pos <= minimum: 182 return pos 183 while pos > minimum: 184 pos -= 1 185 if code != self.srcCharCode[pos]: 186 return pos + 1 187 return pos 188 189 def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: 190 """Cut lines range from source.""" 191 line = begin 192 if begin >= end: 193 return "" 194 195 queue = [""] * (end - begin) 196 197 i = 1 198 while line < end: 199 lineIndent = 0 200 lineStart = first = self.bMarks[line] 201 if line + 1 < end or keepLastLF: 202 last = self.eMarks[line] + 1 203 else: 204 last = self.eMarks[line] 205 206 while (first < last) and (lineIndent < indent): 207 ch = self.srcCharCode[first] 208 if isSpace(ch): 209 if ch == 0x09: 210 lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4 211 else: 212 lineIndent += 1 213 elif first - lineStart < self.tShift[line]: 214 lineIndent += 1 215 else: 216 break 217 first += 1 218 219 if lineIndent > indent: 220 # partially expanding tabs in code blocks, e.g '\t\tfoobar' 221 # with indent=2 becomes ' \tfoobar' 222 queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last] 223 else: 224 queue[i - 1] = self.src[first:last] 225 226 line += 1 227 i += 1 228 229 return "".join(queue) 230