1# -*- coding: utf-8 -*- # 2# Copyright 2017 Google LLC. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Cloud SDK markdown document token renderer. 17 18This is different from the other renderers: 19 20(1) The output is a list of (token, text) tuples returned by 21 TokenRenderer.Finish(). 22(2) A token is an empty object that conveys font style and embellishment by 23 convention using the token name. Callers set up a style sheet indexed by 24 tokens to control how the embellishments are rendered, e.g. color. 25(3) The rendering is constrained by width and height. 26 27Tokens generated by this module: 28 29 Token.Markdown.Bold: bold text 30 Token.Markdown.BoldItalic: bold+italic text 31 Token.Markdown.Code: code text for command line examples 32 Token.Markdown.Definition: definition list item (flag or subcommand or choice) 33 Token.Markdown.Italic: italic text 34 Token.Markdown.Normal: normal text 35 Token.Markdown.Section: section header 36 Token.Markdown.Truncated: the last token => indicates truncation 37 Token.Markdown.Value: definition list item value (flag value) 38 39The Token objects self-define on first usage. Don't champion this pattern in the 40Cloud SDK. 41 42Usage: 43 44 from six.moves import StringIO 45 46 from googlecloudsdk.core.document_renderers import token_renderer 47 from googlecloudsdk.core.document_renderers import render_document 48 49 markdown = <markdown document string> 50 tokens = render_document.MarkdownRenderer( 51 token_renderer.TokenRenderer(width=W, height=H), 52 StringIO(markdown)).Run() 53""" 54 55from __future__ import absolute_import 56from __future__ import division 57from __future__ import unicode_literals 58 59import re 60 61from googlecloudsdk.core.console import console_attr 62from googlecloudsdk.core.document_renderers import renderer 63 64from prompt_toolkit.token import Token 65 66 67class TokenRenderer(renderer.Renderer): 68 """Renders markdown to a list of lines where each line is a list of Tokens. 69 70 Attributes: 71 _attr: console_attr.ConsoleAttr object. 72 _bullet: List of bullet characters indexed by list level modulo #bullets. 73 _compact: Compact representation if True. Saves rendering real estate. 74 _csi: The control sequence indicator character. Token does not 75 have control sequences. This renderer uses them internally to manage 76 font styles and attributes (bold, code, italic). 77 _current_token_type: current default Token.Markdown.* type 78 _fill: The number of characters in the current output line. 79 _height: The height of the output window, 0 to disable height checks. 80 _ignore_paragraph: Ignore paragraph markdown until the next non-space 81 _AddToken. 82 _ignore_width: True if the next output word should ignore _width. 83 _indent: List of left indentations in characters indexed by _level. 84 _level: The section or list level counting from 0. 85 _tokens: The list of output tokens 86 _truncated: The number of output lines exceeded the output height. 87 _rows: current rows in table 88 """ 89 # Internal inline embellishments are 2 character sequences 90 # <CSI><EMBELLISHMENT>. The embellishment must be an alpha character 91 # to make the display width helpers work properly. 92 CSI = '\0' # Won't clash with markdown text input. 93 EMBELLISHMENTS = { 94 'B': Token.Markdown.Bold, 95 'C': Token.Markdown.Code, 96 'I': Token.Markdown.Italic, 97 'N': Token.Markdown.Normal, 98 'Z': Token.Markdown.BoldItalic, 99 } 100 INDENT = 4 101 SPLIT_INDENT = 2 102 TOKEN_TYPE_INDEX = 0 103 TOKEN_TEXT_INDEX = 1 104 105 class Indent(object): 106 """Second line indent stack.""" 107 108 def __init__(self, compact=True): 109 self.indent = 0 if compact else TokenRenderer.INDENT 110 self.second_line_indent = self.indent 111 112 def __init__(self, height=0, encoding='utf-8', compact=True, **kwargs): 113 super(TokenRenderer, self).__init__(**kwargs) 114 self._attr = console_attr.GetConsoleAttr(encoding=encoding) 115 self._csi = self.CSI 116 self._attr._csi = self._csi # pylint: disable=protected-access 117 self._bullet = self._attr.GetBullets() 118 self._compact = compact 119 self._fill = 0 120 self._current_token_type = Token.Markdown.Normal 121 self._height = height 122 self._ignore_paragraph = False 123 self._ignore_width = False 124 self._indent = [self.Indent(compact)] 125 self._level = 0 126 self._lines = [] 127 self._tokens = [] 128 self._truncated = False 129 self._rows = [] 130 131 def _Truncate(self, tokens, overflow): 132 """Injects a truncation indicator token and rejects subsequent tokens. 133 134 Args: 135 tokens: The last line of tokens at the output height. The part of the 136 line within the output width will be visible, modulo the trailing 137 truncation marker token added here. 138 overflow: If not None then this is a (word, available) tuple from Fill() 139 where word caused the line width overflow and available is the number of 140 characters available in the current line before ' '+word would be 141 appended. 142 143 Returns: 144 A possibly altered list of tokens that form the last output line. 145 """ 146 self._truncated = True 147 marker_string = '...' 148 marker_width = len(marker_string) 149 marker_token = (Token.Markdown.Truncated, marker_string) 150 if tokens and overflow: 151 word, available = overflow # pylint: disable=unpacking-non-sequence 152 if marker_width == available: 153 # Exactly enough space for the marker. 154 pass 155 elif (marker_width + 1) <= available: 156 # The marker can replace the trailing characters in the overflow word. 157 word = ' ' + self._UnFormat(word)[:available-marker_width-1] 158 tokens.append((self._current_token_type, word)) 159 else: 160 # Truncate the token list so the marker token can fit. 161 truncated_tokens = [] 162 available = self._width 163 for token in tokens: 164 word = token[self.TOKEN_TEXT_INDEX] 165 width = self._attr.DisplayWidth(word) 166 available -= width 167 if available <= marker_width: 168 trim = marker_width - available 169 if trim: 170 word = word[:-trim] 171 truncated_tokens.append((token[self.TOKEN_TYPE_INDEX], word)) 172 break 173 truncated_tokens.append(token) 174 tokens = truncated_tokens 175 tokens.append(marker_token) 176 return tokens 177 178 def _NewLine(self, overflow=None): 179 """Adds the current token list to the line list. 180 181 Args: 182 overflow: If not None then this is a (word, available) tuple from Fill() 183 where word caused the line width overflow and available is the number of 184 characters available in the current line before ' '+word would be 185 appended. 186 """ 187 tokens = self._tokens 188 self._tokens = [] 189 if self._truncated or not tokens and self._compact: 190 return 191 if self._lines: 192 # Delete trailing space. 193 while (self._lines[-1] and 194 self._lines[-1][-1][self.TOKEN_TEXT_INDEX].isspace()): 195 self._lines[-1] = self._lines[-1][:-1] 196 if self._height and (len(self._lines) + int(bool(tokens))) >= self._height: 197 tokens = self._Truncate(tokens, overflow) 198 self._lines.append(tokens) 199 200 def _MergeOrAddToken(self, text, token_type): 201 """Merges text if the previous token_type matches or appends a new token.""" 202 if not text: 203 return 204 if (not self._tokens or 205 self._tokens[-1][self.TOKEN_TYPE_INDEX] != token_type): 206 self._tokens.append((token_type, text)) 207 elif self._tokens[-1][self.TOKEN_TYPE_INDEX] == Token.Markdown.Section: 208 # A section header with no content. 209 prv_text = self._tokens[-1][self.TOKEN_TEXT_INDEX] 210 prv_indent = re.match('( *)', prv_text).group(1) 211 new_indent = re.match('( *)', text).group(1) 212 if prv_indent == new_indent: 213 # Same indentation => discard the previous empty section. 214 self._tokens[-1] = (token_type, text) 215 else: 216 # Insert newline to separate previous header from the new one. 217 self._NewLine() 218 self._tokens.append((token_type, text)) 219 else: 220 self._tokens[-1] = (token_type, 221 self._tokens[-1][self.TOKEN_TEXT_INDEX] + text) 222 223 def _AddToken(self, text, token_type=None): 224 """Appends a (token_type, text) tuple to the current line.""" 225 if text and not text.isspace(): 226 self._ignore_paragraph = False 227 if not token_type: 228 token_type = self._current_token_type 229 if self._csi not in text: 230 self._MergeOrAddToken(text, token_type) 231 else: 232 i = 0 233 while True: 234 j = text.find(self._csi, i) 235 if j < 0: 236 self._MergeOrAddToken(text[i:], token_type) 237 break 238 self._MergeOrAddToken(text[i:j], token_type) 239 token_type = self.EMBELLISHMENTS[text[j + 1]] 240 self._current_token_type = token_type 241 i = j + 2 242 243 def _UnFormat(self, text): 244 """Returns text with all inline formatting stripped.""" 245 if self._csi not in text: 246 return text 247 stripped = [] 248 i = 0 249 while i < len(text): 250 j = text.find(self._csi, i) 251 if j < 0: 252 stripped.append(text[i:]) 253 break 254 stripped.append(text[i:j]) 255 i = j + 2 256 return ''.join(stripped) 257 258 def _AddDefinition(self, text): 259 """Appends a definition list definition item to the current line.""" 260 text = self._UnFormat(text) 261 parts = text.split('=', 1) 262 self._AddToken(parts[0], Token.Markdown.Definition) 263 if len(parts) > 1: 264 self._AddToken('=', Token.Markdown.Normal) 265 self._AddToken(parts[1], Token.Markdown.Value) 266 self._NewLine() 267 268 def _Flush(self): 269 """Flushes the current collection of Fill() lines.""" 270 self._ignore_width = False 271 if self._fill: 272 self._NewLine() 273 self.Content() 274 self._fill = 0 275 276 def _SetIndent(self, level, indent=0, second_line_indent=None): 277 """Sets the markdown list level and indentations. 278 279 Args: 280 level: int, The desired markdown list level. 281 indent: int, The new indentation. 282 second_line_indent: int, The second line indentation. This is subtracted 283 from the prevailing indent to decrease the indentation of the next input 284 line for this effect: 285 SECOND LINE INDENT ON THE NEXT LINE 286 PREVAILING INDENT 287 ON SUBSEQUENT LINES 288 """ 289 if self._level < level: 290 # The level can increase by 1 or more. Loop through each so that 291 # intervening levels are handled the same. 292 while self._level < level: 293 prev_level = self._level 294 self._level += 1 295 if self._level >= len(self._indent): 296 self._indent.append(self.Indent()) 297 self._indent[self._level].indent = ( 298 self._indent[prev_level].indent + indent) 299 if (self._level > 1 and 300 self._indent[prev_level].second_line_indent == 301 self._indent[prev_level].indent): 302 # Bump the indent by 1 char for nested indentation. Top level looks 303 # fine (aesthetically) without it. 304 self._indent[self._level].indent += 1 305 self._indent[self._level].second_line_indent = ( 306 self._indent[self._level].indent) 307 if second_line_indent is not None: 308 # Adjust the second line indent if specified. 309 self._indent[self._level].second_line_indent -= second_line_indent 310 else: 311 # Decreasing level just sets the indent stack level, no state to clean up. 312 self._level = level 313 if second_line_indent is not None: 314 # Change second line indent on existing level. 315 self._indent[self._level].indent = ( 316 self._indent[self._level].second_line_indent + second_line_indent) 317 318 def Example(self, line): 319 """Displays line as an indented example. 320 321 Args: 322 line: The example line text. 323 """ 324 self._fill = self._indent[self._level].indent + self.INDENT 325 self._AddToken(' ' * self._fill + line, Token.Markdown.Normal) 326 self._NewLine() 327 self.Content() 328 self._fill = 0 329 330 def Fill(self, line): 331 """Adds a line to the output, splitting to stay within the output width. 332 333 This is close to textwrap.wrap() except that control sequence characters 334 don't count in the width computation. 335 336 Args: 337 line: The text line. 338 """ 339 self.Blank() 340 for word in line.split(): 341 if not self._fill: 342 if self._level or not self._compact: 343 self._fill = self._indent[self._level].indent - 1 344 else: 345 self._level = 0 346 self._AddToken(' ' * self._fill) 347 width = self._attr.DisplayWidth(word) 348 available = self._width - self._fill 349 if (width + 1) >= available and not self._ignore_width: 350 self._NewLine(overflow=(word, available)) 351 self._fill = self._indent[self._level].indent 352 self._AddToken(' ' * self._fill) 353 else: 354 self._ignore_width = False 355 if self._fill: 356 self._fill += 1 357 self._AddToken(' ') 358 self._fill += width 359 self._AddToken(word) 360 361 def Finish(self): 362 """Finishes all output document rendering.""" 363 self._Flush() 364 self.Font() 365 return self._lines 366 367 def Font(self, attr=None): 368 """Returns the font embellishment control sequence for attr. 369 370 Args: 371 attr: None to reset to the default font, otherwise one of renderer.BOLD, 372 renderer.ITALIC, or renderer.CODE. 373 374 Returns: 375 The font embellishment control sequence. 376 """ 377 if attr is None: 378 self._font = 0 379 else: 380 mask = 1 << attr 381 self._font ^= mask 382 font = self._font & ((1 << renderer.BOLD) | 383 (1 << renderer.CODE) | 384 (1 << renderer.ITALIC)) 385 if font & (1 << renderer.CODE): 386 embellishment = 'C' 387 elif font == ((1 << renderer.BOLD) | (1 << renderer.ITALIC)): 388 embellishment = 'Z' 389 elif font == (1 << renderer.BOLD): 390 embellishment = 'B' 391 elif font == (1 << renderer.ITALIC): 392 embellishment = 'I' 393 else: 394 embellishment = 'N' 395 return self._csi + embellishment 396 397 def Heading(self, level, heading): 398 """Renders a heading. 399 400 Args: 401 level: The heading level counting from 1. 402 heading: The heading text. 403 """ 404 if level == 1 and heading.endswith('(1)'): 405 # Ignore man page TH. 406 return 407 self._Flush() 408 self.Line() 409 self.Font() 410 if level > 2: 411 indent = ' ' * (level - 2) 412 self._AddToken(indent) 413 if self._compact: 414 self._ignore_paragraph = True 415 self._fill += len(indent) 416 self._AddToken(heading, Token.Markdown.Section) 417 if self._compact: 418 self._ignore_paragraph = True 419 self._fill += self._attr.DisplayWidth(heading) 420 else: 421 self._NewLine() 422 self.Blank() 423 self._level = 0 424 self._rows = [] 425 426 def Line(self): 427 """Renders a paragraph separating line.""" 428 if self._ignore_paragraph: 429 return 430 self._Flush() 431 if not self.HaveBlank(): 432 self.Blank() 433 self._NewLine() 434 435 def List(self, level, definition=None, end=False): 436 """Renders a bullet or definition list item. 437 438 Args: 439 level: The list nesting level, 0 if not currently in a list. 440 definition: Bullet list if None, definition list item otherwise. 441 end: End of list if True. 442 """ 443 self._Flush() 444 if not level: 445 self._level = level 446 elif end: 447 # End of list. 448 self._SetIndent(level) 449 elif definition is not None: 450 # Definition list item. 451 if definition: 452 self._SetIndent(level, indent=4, second_line_indent=3) 453 self._AddToken(' ' * self._indent[level].second_line_indent) 454 self._AddDefinition(definition) 455 else: 456 self._SetIndent(level, indent=1, second_line_indent=0) 457 self.Line() 458 else: 459 # Bullet list item. 460 indent = 2 if level > 1 else 4 461 self._SetIndent(level, indent=indent, second_line_indent=2) 462 self._AddToken(' ' * self._indent[level].second_line_indent + 463 self._bullet[(level - 1) % len(self._bullet)]) 464 self._fill = self._indent[level].indent + 1 465 self._ignore_width = True 466 467 def _SkipSpace(self, line, index): 468 """Skip space characters starting at line[index]. 469 470 Args: 471 line: The string. 472 index: The starting index in string. 473 474 Returns: 475 The index in line after spaces or len(line) at end of string. 476 """ 477 while index < len(line): 478 c = line[index] 479 if c != ' ': 480 break 481 index += 1 482 return index 483 484 def _SkipControlSequence(self, line, index): 485 """Skip the control sequence at line[index]. 486 487 Args: 488 line: The string. 489 index: The starting index in string. 490 491 Returns: 492 The index in line after the control sequence or len(line) at end of 493 string. 494 """ 495 n = self._attr.GetControlSequenceLen(line[index:]) 496 if not n: 497 n = 1 498 return index + n 499 500 def _SkipNest(self, line, index, open_chars='[(', close_chars=')]'): 501 """Skip a [...] nested bracket group starting at line[index]. 502 503 Args: 504 line: The string. 505 index: The starting index in string. 506 open_chars: The open nesting characters. 507 close_chars: The close nesting characters. 508 509 Returns: 510 The index in line after the nesting group or len(line) at end of string. 511 """ 512 nest = 0 513 while index < len(line): 514 c = line[index] 515 index += 1 516 if c in open_chars: 517 nest += 1 518 elif c in close_chars: 519 nest -= 1 520 if nest <= 0: 521 break 522 elif c == self._csi: 523 index = self._SkipControlSequence(line, index) 524 return index 525 526 def _SplitWideSynopsisGroup(self, group, indent, running_width): 527 """Splits a wide SYNOPSIS section group string._out. 528 529 Args: 530 group: The wide group string to split. 531 indent: The prevailing left indent. 532 running_width: The width of the line in progress. 533 534 Returns: 535 The running_width after the group has been split and written. 536 """ 537 prev_delimiter = ' ' 538 while group: 539 # Check split delimiters in order for visual emphasis. 540 for delimiter in (' | ', ' : ', ' ', ','): 541 part, _, remainder = group.partition(delimiter) 542 w = self._attr.DisplayWidth(part) 543 if ((running_width + len(prev_delimiter) + w) >= self._width or 544 prev_delimiter != ',' and delimiter == ','): 545 if delimiter != ',' and (indent + 546 self.SPLIT_INDENT + 547 len(prev_delimiter) + 548 w) >= self._width: 549 # The next delimiter may produce a smaller first part. 550 continue 551 if prev_delimiter == ',': 552 self._AddToken(prev_delimiter) 553 prev_delimiter = ' ' 554 if running_width != indent: 555 running_width = indent + self.SPLIT_INDENT 556 self._NewLine() 557 self._AddToken(' ' * running_width) 558 self._AddToken(prev_delimiter + part) 559 running_width += len(prev_delimiter) + w 560 prev_delimiter = delimiter 561 group = remainder 562 break 563 return running_width 564 565 def Synopsis(self, line, is_synopsis=False): 566 """Renders NAME and SYNOPSIS lines as a second line indent. 567 568 Collapses adjacent spaces to one space, deletes trailing space, and doesn't 569 split top-level nested [...] or (...) groups. Also detects and does not 570 count terminal control sequences. 571 572 Args: 573 line: The NAME or SYNOPSIS text. 574 is_synopsis: if it is the synopsis section 575 """ 576 # Split the line into token, token | token, and [...] groups. 577 groups = [] 578 i = self._SkipSpace(line, 0) 579 beg = i 580 while i < len(line): 581 c = line[i] 582 if c == ' ': 583 end = i 584 i = self._SkipSpace(line, i) 585 if i <= (len(line) - 1) and line[i] == '|' and line[i + 1] == ' ': 586 i = self._SkipSpace(line, i + 1) 587 else: 588 groups.append(line[beg:end]) 589 beg = i 590 elif c in '[(': 591 i = self._SkipNest(line, i) 592 elif c == self._csi: 593 i = self._SkipControlSequence(line, i) 594 else: 595 i += 1 596 if beg < len(line): 597 groups.append(line[beg:]) 598 599 # Output the groups. 600 indent = self._indent[0].indent - 1 601 running_width = indent 602 self._AddToken(' ' * running_width) 603 indent += self.INDENT 604 for group in groups: 605 w = self._attr.DisplayWidth(group) + 1 606 if (running_width + w) >= self._width: 607 running_width = indent 608 self._NewLine() 609 self._AddToken(' ' * running_width) 610 if (running_width + w) >= self._width: 611 # The group is wider than the available width and must be split. 612 running_width = self._SplitWideSynopsisGroup( 613 group, indent, running_width) 614 continue 615 self._AddToken(' ' + group) 616 running_width += w 617 self._NewLine() 618 self._NewLine() 619 620 def TableLine(self, line, indent=0): 621 """Adds an indented table line to the output. 622 623 Args: 624 line: The line to add. A newline will be added. 625 indent: The number of characters to indent the table. 626 """ 627 self._AddToken(indent * ' ' + line) 628 self._NewLine() 629