1""" 2Internal module for formatting output data in csv, html, 3and latex files. This module also applies to display formatting. 4""" 5 6from contextlib import contextmanager 7from csv import QUOTE_NONE, QUOTE_NONNUMERIC 8import decimal 9from functools import partial 10from io import StringIO 11import math 12import re 13from shutil import get_terminal_size 14from typing import ( 15 IO, 16 TYPE_CHECKING, 17 Any, 18 Callable, 19 Dict, 20 Iterable, 21 List, 22 Mapping, 23 Optional, 24 Sequence, 25 Tuple, 26 Type, 27 Union, 28 cast, 29) 30from unicodedata import east_asian_width 31 32import numpy as np 33 34from pandas._config.config import get_option, set_option 35 36from pandas._libs import lib 37from pandas._libs.missing import NA 38from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT 39from pandas._libs.tslibs.nattype import NaTType 40from pandas._typing import ( 41 ArrayLike, 42 CompressionOptions, 43 FilePathOrBuffer, 44 FloatFormatType, 45 IndexLabel, 46 Label, 47 StorageOptions, 48) 49 50from pandas.core.dtypes.common import ( 51 is_categorical_dtype, 52 is_complex_dtype, 53 is_datetime64_dtype, 54 is_datetime64tz_dtype, 55 is_extension_array_dtype, 56 is_float, 57 is_float_dtype, 58 is_integer, 59 is_integer_dtype, 60 is_list_like, 61 is_numeric_dtype, 62 is_scalar, 63 is_timedelta64_dtype, 64) 65from pandas.core.dtypes.missing import isna, notna 66 67from pandas.core.arrays.datetimes import DatetimeArray 68from pandas.core.arrays.timedeltas import TimedeltaArray 69from pandas.core.base import PandasObject 70import pandas.core.common as com 71from pandas.core.construction import extract_array 72from pandas.core.indexes.api import Index, MultiIndex, PeriodIndex, ensure_index 73from pandas.core.indexes.datetimes import DatetimeIndex 74from pandas.core.indexes.timedeltas import TimedeltaIndex 75from pandas.core.reshape.concat import concat 76 77from pandas.io.common import stringify_path 78from pandas.io.formats.printing import adjoin, justify, pprint_thing 79 80if TYPE_CHECKING: 81 from pandas import Categorical, DataFrame, Series 82 83 84FormattersType = Union[ 85 List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] 86] 87ColspaceType = Mapping[Label, Union[str, int]] 88ColspaceArgType = Union[ 89 str, int, Sequence[Union[str, int]], Mapping[Label, Union[str, int]] 90] 91 92common_docstring = """ 93 Parameters 94 ---------- 95 buf : str, Path or StringIO-like, optional, default None 96 Buffer to write to. If None, the output is returned as a string. 97 columns : sequence, optional, default None 98 The subset of columns to write. Writes all columns by default. 99 col_space : %(col_space_type)s, optional 100 %(col_space)s. 101 header : %(header_type)s, optional 102 %(header)s. 103 index : bool, optional, default True 104 Whether to print index (row) labels. 105 na_rep : str, optional, default 'NaN' 106 String representation of ``NaN`` to use. 107 formatters : list, tuple or dict of one-param. functions, optional 108 Formatter functions to apply to columns' elements by position or 109 name. 110 The result of each function must be a unicode string. 111 List/tuple must be of length equal to the number of columns. 112 float_format : one-parameter function, optional, default None 113 Formatter function to apply to columns' elements if they are 114 floats. This function must return a unicode string and will be 115 applied only to the non-``NaN`` elements, with ``NaN`` being 116 handled by ``na_rep``. 117 118 .. versionchanged:: 1.2.0 119 120 sparsify : bool, optional, default True 121 Set to False for a DataFrame with a hierarchical index to print 122 every multiindex key at each row. 123 index_names : bool, optional, default True 124 Prints the names of the indexes. 125 justify : str, default None 126 How to justify the column labels. If None uses the option from 127 the print configuration (controlled by set_option), 'right' out 128 of the box. Valid values are 129 130 * left 131 * right 132 * center 133 * justify 134 * justify-all 135 * start 136 * end 137 * inherit 138 * match-parent 139 * initial 140 * unset. 141 max_rows : int, optional 142 Maximum number of rows to display in the console. 143 min_rows : int, optional 144 The number of rows to display in the console in a truncated repr 145 (when number of rows is above `max_rows`). 146 max_cols : int, optional 147 Maximum number of columns to display in the console. 148 show_dimensions : bool, default False 149 Display DataFrame dimensions (number of rows by number of columns). 150 decimal : str, default '.' 151 Character recognized as decimal separator, e.g. ',' in Europe. 152 """ 153 154_VALID_JUSTIFY_PARAMETERS = ( 155 "left", 156 "right", 157 "center", 158 "justify", 159 "justify-all", 160 "start", 161 "end", 162 "inherit", 163 "match-parent", 164 "initial", 165 "unset", 166) 167 168return_docstring = """ 169 Returns 170 ------- 171 str or None 172 If buf is None, returns the result as a string. Otherwise returns 173 None. 174 """ 175 176 177class CategoricalFormatter: 178 def __init__( 179 self, 180 categorical: "Categorical", 181 buf: Optional[IO[str]] = None, 182 length: bool = True, 183 na_rep: str = "NaN", 184 footer: bool = True, 185 ): 186 self.categorical = categorical 187 self.buf = buf if buf is not None else StringIO("") 188 self.na_rep = na_rep 189 self.length = length 190 self.footer = footer 191 self.quoting = QUOTE_NONNUMERIC 192 193 def _get_footer(self) -> str: 194 footer = "" 195 196 if self.length: 197 if footer: 198 footer += ", " 199 footer += f"Length: {len(self.categorical)}" 200 201 level_info = self.categorical._repr_categories_info() 202 203 # Levels are added in a newline 204 if footer: 205 footer += "\n" 206 footer += level_info 207 208 return str(footer) 209 210 def _get_formatted_values(self) -> List[str]: 211 return format_array( 212 self.categorical._internal_get_values(), 213 None, 214 float_format=None, 215 na_rep=self.na_rep, 216 quoting=self.quoting, 217 ) 218 219 def to_string(self) -> str: 220 categorical = self.categorical 221 222 if len(categorical) == 0: 223 if self.footer: 224 return self._get_footer() 225 else: 226 return "" 227 228 fmt_values = self._get_formatted_values() 229 230 fmt_values = [i.strip() for i in fmt_values] 231 values = ", ".join(fmt_values) 232 result = ["[" + values + "]"] 233 if self.footer: 234 footer = self._get_footer() 235 if footer: 236 result.append(footer) 237 238 return str("\n".join(result)) 239 240 241class SeriesFormatter: 242 def __init__( 243 self, 244 series: "Series", 245 buf: Optional[IO[str]] = None, 246 length: Union[bool, str] = True, 247 header: bool = True, 248 index: bool = True, 249 na_rep: str = "NaN", 250 name: bool = False, 251 float_format: Optional[str] = None, 252 dtype: bool = True, 253 max_rows: Optional[int] = None, 254 min_rows: Optional[int] = None, 255 ): 256 self.series = series 257 self.buf = buf if buf is not None else StringIO() 258 self.name = name 259 self.na_rep = na_rep 260 self.header = header 261 self.length = length 262 self.index = index 263 self.max_rows = max_rows 264 self.min_rows = min_rows 265 266 if float_format is None: 267 float_format = get_option("display.float_format") 268 self.float_format = float_format 269 self.dtype = dtype 270 self.adj = get_adjustment() 271 272 self._chk_truncate() 273 274 def _chk_truncate(self) -> None: 275 self.tr_row_num: Optional[int] 276 277 min_rows = self.min_rows 278 max_rows = self.max_rows 279 # truncation determined by max_rows, actual truncated number of rows 280 # used below by min_rows 281 is_truncated_vertically = max_rows and (len(self.series) > max_rows) 282 series = self.series 283 if is_truncated_vertically: 284 max_rows = cast(int, max_rows) 285 if min_rows: 286 # if min_rows is set (not None or 0), set max_rows to minimum 287 # of both 288 max_rows = min(min_rows, max_rows) 289 if max_rows == 1: 290 row_num = max_rows 291 series = series.iloc[:max_rows] 292 else: 293 row_num = max_rows // 2 294 series = concat((series.iloc[:row_num], series.iloc[-row_num:])) 295 self.tr_row_num = row_num 296 else: 297 self.tr_row_num = None 298 self.tr_series = series 299 self.is_truncated_vertically = is_truncated_vertically 300 301 def _get_footer(self) -> str: 302 name = self.series.name 303 footer = "" 304 305 if getattr(self.series.index, "freq", None) is not None: 306 assert isinstance( 307 self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex) 308 ) 309 footer += f"Freq: {self.series.index.freqstr}" 310 311 if self.name is not False and name is not None: 312 if footer: 313 footer += ", " 314 315 series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n")) 316 footer += f"Name: {series_name}" 317 318 if self.length is True or ( 319 self.length == "truncate" and self.is_truncated_vertically 320 ): 321 if footer: 322 footer += ", " 323 footer += f"Length: {len(self.series)}" 324 325 if self.dtype is not False and self.dtype is not None: 326 dtype_name = getattr(self.tr_series.dtype, "name", None) 327 if dtype_name: 328 if footer: 329 footer += ", " 330 footer += f"dtype: {pprint_thing(dtype_name)}" 331 332 # level infos are added to the end and in a new line, like it is done 333 # for Categoricals 334 if is_categorical_dtype(self.tr_series.dtype): 335 level_info = self.tr_series._values._repr_categories_info() 336 if footer: 337 footer += "\n" 338 footer += level_info 339 340 return str(footer) 341 342 def _get_formatted_index(self) -> Tuple[List[str], bool]: 343 index = self.tr_series.index 344 345 if isinstance(index, MultiIndex): 346 have_header = any(name for name in index.names) 347 fmt_index = index.format(names=True) 348 else: 349 have_header = index.name is not None 350 fmt_index = index.format(name=True) 351 return fmt_index, have_header 352 353 def _get_formatted_values(self) -> List[str]: 354 return format_array( 355 self.tr_series._values, 356 None, 357 float_format=self.float_format, 358 na_rep=self.na_rep, 359 leading_space=self.index, 360 ) 361 362 def to_string(self) -> str: 363 series = self.tr_series 364 footer = self._get_footer() 365 366 if len(series) == 0: 367 return f"{type(self.series).__name__}([], {footer})" 368 369 fmt_index, have_header = self._get_formatted_index() 370 fmt_values = self._get_formatted_values() 371 372 if self.is_truncated_vertically: 373 n_header_rows = 0 374 row_num = self.tr_row_num 375 row_num = cast(int, row_num) 376 width = self.adj.len(fmt_values[row_num - 1]) 377 if width > 3: 378 dot_str = "..." 379 else: 380 dot_str = ".." 381 # Series uses mode=center because it has single value columns 382 # DataFrame uses mode=left 383 dot_str = self.adj.justify([dot_str], width, mode="center")[0] 384 fmt_values.insert(row_num + n_header_rows, dot_str) 385 fmt_index.insert(row_num + 1, "") 386 387 if self.index: 388 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values]) 389 else: 390 result = self.adj.adjoin(3, fmt_values) 391 392 if self.header and have_header: 393 result = fmt_index[0] + "\n" + result 394 395 if footer: 396 result += "\n" + footer 397 398 return str("".join(result)) 399 400 401class TextAdjustment: 402 def __init__(self): 403 self.encoding = get_option("display.encoding") 404 405 def len(self, text: str) -> int: 406 return len(text) 407 408 def justify(self, texts: Any, max_len: int, mode: str = "right") -> List[str]: 409 return justify(texts, max_len, mode=mode) 410 411 def adjoin(self, space: int, *lists, **kwargs) -> str: 412 return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs) 413 414 415class EastAsianTextAdjustment(TextAdjustment): 416 def __init__(self): 417 super().__init__() 418 if get_option("display.unicode.ambiguous_as_wide"): 419 self.ambiguous_width = 2 420 else: 421 self.ambiguous_width = 1 422 423 # Definition of East Asian Width 424 # https://unicode.org/reports/tr11/ 425 # Ambiguous width can be changed by option 426 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1} 427 428 def len(self, text: str) -> int: 429 """ 430 Calculate display width considering unicode East Asian Width 431 """ 432 if not isinstance(text, str): 433 return len(text) 434 435 return sum( 436 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text 437 ) 438 439 def justify( 440 self, texts: Iterable[str], max_len: int, mode: str = "right" 441 ) -> List[str]: 442 # re-calculate padding space per str considering East Asian Width 443 def _get_pad(t): 444 return max_len - self.len(t) + len(t) 445 446 if mode == "left": 447 return [x.ljust(_get_pad(x)) for x in texts] 448 elif mode == "center": 449 return [x.center(_get_pad(x)) for x in texts] 450 else: 451 return [x.rjust(_get_pad(x)) for x in texts] 452 453 454def get_adjustment() -> TextAdjustment: 455 use_east_asian_width = get_option("display.unicode.east_asian_width") 456 if use_east_asian_width: 457 return EastAsianTextAdjustment() 458 else: 459 return TextAdjustment() 460 461 462class DataFrameFormatter: 463 """Class for processing dataframe formatting options and data.""" 464 465 __doc__ = __doc__ if __doc__ else "" 466 __doc__ += common_docstring + return_docstring 467 468 def __init__( 469 self, 470 frame: "DataFrame", 471 columns: Optional[Sequence[str]] = None, 472 col_space: Optional[ColspaceArgType] = None, 473 header: Union[bool, Sequence[str]] = True, 474 index: bool = True, 475 na_rep: str = "NaN", 476 formatters: Optional[FormattersType] = None, 477 justify: Optional[str] = None, 478 float_format: Optional[FloatFormatType] = None, 479 sparsify: Optional[bool] = None, 480 index_names: bool = True, 481 max_rows: Optional[int] = None, 482 min_rows: Optional[int] = None, 483 max_cols: Optional[int] = None, 484 show_dimensions: Union[bool, str] = False, 485 decimal: str = ".", 486 bold_rows: bool = False, 487 escape: bool = True, 488 ): 489 self.frame = frame 490 self.columns = self._initialize_columns(columns) 491 self.col_space = self._initialize_colspace(col_space) 492 self.header = header 493 self.index = index 494 self.na_rep = na_rep 495 self.formatters = self._initialize_formatters(formatters) 496 self.justify = self._initialize_justify(justify) 497 self.float_format = float_format 498 self.sparsify = self._initialize_sparsify(sparsify) 499 self.show_index_names = index_names 500 self.decimal = decimal 501 self.bold_rows = bold_rows 502 self.escape = escape 503 self.max_rows = max_rows 504 self.min_rows = min_rows 505 self.max_cols = max_cols 506 self.show_dimensions = show_dimensions 507 508 self.max_cols_fitted = self._calc_max_cols_fitted() 509 self.max_rows_fitted = self._calc_max_rows_fitted() 510 511 self.tr_frame = self.frame 512 self.truncate() 513 self.adj = get_adjustment() 514 515 def get_strcols(self) -> List[List[str]]: 516 """ 517 Render a DataFrame to a list of columns (as lists of strings). 518 """ 519 strcols = self._get_strcols_without_index() 520 521 if self.index: 522 str_index = self._get_formatted_index(self.tr_frame) 523 strcols.insert(0, str_index) 524 525 return strcols 526 527 @property 528 def should_show_dimensions(self) -> bool: 529 return self.show_dimensions is True or ( 530 self.show_dimensions == "truncate" and self.is_truncated 531 ) 532 533 @property 534 def is_truncated(self) -> bool: 535 return bool(self.is_truncated_horizontally or self.is_truncated_vertically) 536 537 @property 538 def is_truncated_horizontally(self) -> bool: 539 return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted)) 540 541 @property 542 def is_truncated_vertically(self) -> bool: 543 return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted)) 544 545 @property 546 def dimensions_info(self) -> str: 547 return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]" 548 549 @property 550 def has_index_names(self) -> bool: 551 return _has_names(self.frame.index) 552 553 @property 554 def has_column_names(self) -> bool: 555 return _has_names(self.frame.columns) 556 557 @property 558 def show_row_idx_names(self) -> bool: 559 return all((self.has_index_names, self.index, self.show_index_names)) 560 561 @property 562 def show_col_idx_names(self) -> bool: 563 return all((self.has_column_names, self.show_index_names, self.header)) 564 565 @property 566 def max_rows_displayed(self) -> int: 567 return min(self.max_rows or len(self.frame), len(self.frame)) 568 569 def _initialize_sparsify(self, sparsify: Optional[bool]) -> bool: 570 if sparsify is None: 571 return get_option("display.multi_sparse") 572 return sparsify 573 574 def _initialize_formatters( 575 self, formatters: Optional[FormattersType] 576 ) -> FormattersType: 577 if formatters is None: 578 return {} 579 elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict): 580 return formatters 581 else: 582 raise ValueError( 583 f"Formatters length({len(formatters)}) should match " 584 f"DataFrame number of columns({len(self.frame.columns)})" 585 ) 586 587 def _initialize_justify(self, justify: Optional[str]) -> str: 588 if justify is None: 589 return get_option("display.colheader_justify") 590 else: 591 return justify 592 593 def _initialize_columns(self, columns: Optional[Sequence[str]]) -> Index: 594 if columns is not None: 595 cols = ensure_index(columns) 596 self.frame = self.frame[cols] 597 return cols 598 else: 599 return self.frame.columns 600 601 def _initialize_colspace( 602 self, col_space: Optional[ColspaceArgType] 603 ) -> ColspaceType: 604 result: ColspaceType 605 606 if col_space is None: 607 result = {} 608 elif isinstance(col_space, (int, str)): 609 result = {"": col_space} 610 result.update({column: col_space for column in self.frame.columns}) 611 elif isinstance(col_space, Mapping): 612 for column in col_space.keys(): 613 if column not in self.frame.columns and column != "": 614 raise ValueError( 615 f"Col_space is defined for an unknown column: {column}" 616 ) 617 result = col_space 618 else: 619 if len(self.frame.columns) != len(col_space): 620 raise ValueError( 621 f"Col_space length({len(col_space)}) should match " 622 f"DataFrame number of columns({len(self.frame.columns)})" 623 ) 624 result = dict(zip(self.frame.columns, col_space)) 625 return result 626 627 def _calc_max_cols_fitted(self) -> Optional[int]: 628 """Number of columns fitting the screen.""" 629 if not self._is_in_terminal(): 630 return self.max_cols 631 632 width, _ = get_terminal_size() 633 if self._is_screen_narrow(width): 634 return width 635 else: 636 return self.max_cols 637 638 def _calc_max_rows_fitted(self) -> Optional[int]: 639 """Number of rows with data fitting the screen.""" 640 max_rows: Optional[int] 641 642 if self._is_in_terminal(): 643 _, height = get_terminal_size() 644 if self.max_rows == 0: 645 # rows available to fill with actual data 646 return height - self._get_number_of_auxillary_rows() 647 648 if self._is_screen_short(height): 649 max_rows = height 650 else: 651 max_rows = self.max_rows 652 else: 653 max_rows = self.max_rows 654 655 return self._adjust_max_rows(max_rows) 656 657 def _adjust_max_rows(self, max_rows: Optional[int]) -> Optional[int]: 658 """Adjust max_rows using display logic. 659 660 See description here: 661 https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options 662 663 GH #37359 664 """ 665 if max_rows: 666 if (len(self.frame) > max_rows) and self.min_rows: 667 # if truncated, set max_rows showed to min_rows 668 max_rows = min(self.min_rows, max_rows) 669 return max_rows 670 671 def _is_in_terminal(self) -> bool: 672 """Check if the output is to be shown in terminal.""" 673 return bool(self.max_cols == 0 or self.max_rows == 0) 674 675 def _is_screen_narrow(self, max_width) -> bool: 676 return bool(self.max_cols == 0 and len(self.frame.columns) > max_width) 677 678 def _is_screen_short(self, max_height) -> bool: 679 return bool(self.max_rows == 0 and len(self.frame) > max_height) 680 681 def _get_number_of_auxillary_rows(self) -> int: 682 """Get number of rows occupied by prompt, dots and dimension info.""" 683 dot_row = 1 684 prompt_row = 1 685 num_rows = dot_row + prompt_row 686 687 if self.show_dimensions: 688 num_rows += len(self.dimensions_info.splitlines()) 689 690 if self.header: 691 num_rows += 1 692 693 return num_rows 694 695 def truncate(self) -> None: 696 """ 697 Check whether the frame should be truncated. If so, slice the frame up. 698 """ 699 if self.is_truncated_horizontally: 700 self._truncate_horizontally() 701 702 if self.is_truncated_vertically: 703 self._truncate_vertically() 704 705 def _truncate_horizontally(self) -> None: 706 """Remove columns, which are not to be displayed and adjust formatters. 707 708 Attributes affected: 709 - tr_frame 710 - formatters 711 - tr_col_num 712 """ 713 assert self.max_cols_fitted is not None 714 col_num = self.max_cols_fitted // 2 715 if col_num >= 1: 716 left = self.tr_frame.iloc[:, :col_num] 717 right = self.tr_frame.iloc[:, -col_num:] 718 self.tr_frame = concat((left, right), axis=1) 719 720 # truncate formatter 721 if isinstance(self.formatters, (list, tuple)): 722 self.formatters = [ 723 *self.formatters[:col_num], 724 *self.formatters[-col_num:], 725 ] 726 else: 727 col_num = cast(int, self.max_cols) 728 self.tr_frame = self.tr_frame.iloc[:, :col_num] 729 self.tr_col_num = col_num 730 731 def _truncate_vertically(self) -> None: 732 """Remove rows, which are not to be displayed. 733 734 Attributes affected: 735 - tr_frame 736 - tr_row_num 737 """ 738 assert self.max_rows_fitted is not None 739 row_num = self.max_rows_fitted // 2 740 if row_num >= 1: 741 head = self.tr_frame.iloc[:row_num, :] 742 tail = self.tr_frame.iloc[-row_num:, :] 743 self.tr_frame = concat((head, tail)) 744 else: 745 row_num = cast(int, self.max_rows) 746 self.tr_frame = self.tr_frame.iloc[:row_num, :] 747 self.tr_row_num = row_num 748 749 def _get_strcols_without_index(self) -> List[List[str]]: 750 strcols: List[List[str]] = [] 751 752 if not is_list_like(self.header) and not self.header: 753 for i, c in enumerate(self.tr_frame): 754 fmt_values = self.format_col(i) 755 fmt_values = _make_fixed_width( 756 strings=fmt_values, 757 justify=self.justify, 758 minimum=int(self.col_space.get(c, 0)), 759 adj=self.adj, 760 ) 761 strcols.append(fmt_values) 762 return strcols 763 764 if is_list_like(self.header): 765 # cast here since can't be bool if is_list_like 766 self.header = cast(List[str], self.header) 767 if len(self.header) != len(self.columns): 768 raise ValueError( 769 f"Writing {len(self.columns)} cols " 770 f"but got {len(self.header)} aliases" 771 ) 772 str_columns = [[label] for label in self.header] 773 else: 774 str_columns = self._get_formatted_column_labels(self.tr_frame) 775 776 if self.show_row_idx_names: 777 for x in str_columns: 778 x.append("") 779 780 for i, c in enumerate(self.tr_frame): 781 cheader = str_columns[i] 782 header_colwidth = max( 783 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader) 784 ) 785 fmt_values = self.format_col(i) 786 fmt_values = _make_fixed_width( 787 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj 788 ) 789 790 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth) 791 cheader = self.adj.justify(cheader, max_len, mode=self.justify) 792 strcols.append(cheader + fmt_values) 793 794 return strcols 795 796 def format_col(self, i: int) -> List[str]: 797 frame = self.tr_frame 798 formatter = self._get_formatter(i) 799 return format_array( 800 frame.iloc[:, i]._values, 801 formatter, 802 float_format=self.float_format, 803 na_rep=self.na_rep, 804 space=self.col_space.get(frame.columns[i]), 805 decimal=self.decimal, 806 leading_space=self.index, 807 ) 808 809 def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]: 810 if isinstance(self.formatters, (list, tuple)): 811 if is_integer(i): 812 i = cast(int, i) 813 return self.formatters[i] 814 else: 815 return None 816 else: 817 if is_integer(i) and i not in self.columns: 818 i = self.columns[i] 819 return self.formatters.get(i, None) 820 821 def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: 822 from pandas.core.indexes.multi import sparsify_labels 823 824 columns = frame.columns 825 826 if isinstance(columns, MultiIndex): 827 fmt_columns = columns.format(sparsify=False, adjoin=False) 828 fmt_columns = list(zip(*fmt_columns)) 829 dtypes = self.frame.dtypes._values 830 831 # if we have a Float level, they don't use leading space at all 832 restrict_formatting = any(level.is_floating for level in columns.levels) 833 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) 834 835 def space_format(x, y): 836 if ( 837 y not in self.formatters 838 and need_leadsp[x] 839 and not restrict_formatting 840 ): 841 return " " + y 842 return y 843 844 str_columns = list( 845 zip(*[[space_format(x, y) for y in x] for x in fmt_columns]) 846 ) 847 if self.sparsify and len(str_columns): 848 str_columns = sparsify_labels(str_columns) 849 850 str_columns = [list(x) for x in zip(*str_columns)] 851 else: 852 fmt_columns = columns.format() 853 dtypes = self.frame.dtypes 854 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) 855 str_columns = [ 856 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x] 857 for i, (col, x) in enumerate(zip(columns, fmt_columns)) 858 ] 859 # self.str_columns = str_columns 860 return str_columns 861 862 def _get_formatted_index(self, frame: "DataFrame") -> List[str]: 863 # Note: this is only used by to_string() and to_latex(), not by 864 # to_html(). so safe to cast col_space here. 865 col_space = {k: cast(int, v) for k, v in self.col_space.items()} 866 index = frame.index 867 columns = frame.columns 868 fmt = self._get_formatter("__index__") 869 870 if isinstance(index, MultiIndex): 871 fmt_index = index.format( 872 sparsify=self.sparsify, 873 adjoin=False, 874 names=self.show_row_idx_names, 875 formatter=fmt, 876 ) 877 else: 878 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)] 879 880 fmt_index = [ 881 tuple( 882 _make_fixed_width( 883 list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj 884 ) 885 ) 886 for x in fmt_index 887 ] 888 889 adjoined = self.adj.adjoin(1, *fmt_index).split("\n") 890 891 # empty space for columns 892 if self.show_col_idx_names: 893 col_header = [str(x) for x in self._get_column_name_list()] 894 else: 895 col_header = [""] * columns.nlevels 896 897 if self.header: 898 return col_header + adjoined 899 else: 900 return adjoined 901 902 def _get_column_name_list(self) -> List[str]: 903 names: List[str] = [] 904 columns = self.frame.columns 905 if isinstance(columns, MultiIndex): 906 names.extend("" if name is None else name for name in columns.names) 907 else: 908 names.append("" if columns.name is None else columns.name) 909 return names 910 911 912class DataFrameRenderer: 913 """Class for creating dataframe output in multiple formats. 914 915 Called in pandas.core.generic.NDFrame: 916 - to_csv 917 - to_latex 918 919 Called in pandas.core.frame.DataFrame: 920 - to_html 921 - to_string 922 923 Parameters 924 ---------- 925 fmt : DataFrameFormatter 926 Formatter with the formating options. 927 """ 928 929 def __init__(self, fmt: DataFrameFormatter): 930 self.fmt = fmt 931 932 def to_latex( 933 self, 934 buf: Optional[FilePathOrBuffer[str]] = None, 935 column_format: Optional[str] = None, 936 longtable: bool = False, 937 encoding: Optional[str] = None, 938 multicolumn: bool = False, 939 multicolumn_format: Optional[str] = None, 940 multirow: bool = False, 941 caption: Optional[str] = None, 942 label: Optional[str] = None, 943 position: Optional[str] = None, 944 ) -> Optional[str]: 945 """ 946 Render a DataFrame to a LaTeX tabular/longtable environment output. 947 """ 948 from pandas.io.formats.latex import LatexFormatter 949 950 latex_formatter = LatexFormatter( 951 self.fmt, 952 longtable=longtable, 953 column_format=column_format, 954 multicolumn=multicolumn, 955 multicolumn_format=multicolumn_format, 956 multirow=multirow, 957 caption=caption, 958 label=label, 959 position=position, 960 ) 961 string = latex_formatter.to_string() 962 return save_to_buffer(string, buf=buf, encoding=encoding) 963 964 def to_html( 965 self, 966 buf: Optional[FilePathOrBuffer[str]] = None, 967 encoding: Optional[str] = None, 968 classes: Optional[Union[str, List, Tuple]] = None, 969 notebook: bool = False, 970 border: Optional[int] = None, 971 table_id: Optional[str] = None, 972 render_links: bool = False, 973 ) -> Optional[str]: 974 """ 975 Render a DataFrame to a html table. 976 977 Parameters 978 ---------- 979 buf : str, Path or StringIO-like, optional, default None 980 Buffer to write to. If None, the output is returned as a string. 981 encoding : str, default “utf-8” 982 Set character encoding. 983 classes : str or list-like 984 classes to include in the `class` attribute of the opening 985 ``<table>`` tag, in addition to the default "dataframe". 986 notebook : {True, False}, optional, default False 987 Whether the generated HTML is for IPython Notebook. 988 border : int 989 A ``border=border`` attribute is included in the opening 990 ``<table>`` tag. Default ``pd.options.display.html.border``. 991 table_id : str, optional 992 A css id is included in the opening `<table>` tag if specified. 993 render_links : bool, default False 994 Convert URLs to HTML links. 995 """ 996 from pandas.io.formats.html import HTMLFormatter, NotebookFormatter 997 998 Klass = NotebookFormatter if notebook else HTMLFormatter 999 1000 html_formatter = Klass( 1001 self.fmt, 1002 classes=classes, 1003 border=border, 1004 table_id=table_id, 1005 render_links=render_links, 1006 ) 1007 string = html_formatter.to_string() 1008 return save_to_buffer(string, buf=buf, encoding=encoding) 1009 1010 def to_string( 1011 self, 1012 buf: Optional[FilePathOrBuffer[str]] = None, 1013 encoding: Optional[str] = None, 1014 line_width: Optional[int] = None, 1015 ) -> Optional[str]: 1016 """ 1017 Render a DataFrame to a console-friendly tabular output. 1018 1019 Parameters 1020 ---------- 1021 buf : str, Path or StringIO-like, optional, default None 1022 Buffer to write to. If None, the output is returned as a string. 1023 encoding: str, default “utf-8” 1024 Set character encoding. 1025 line_width : int, optional 1026 Width to wrap a line in characters. 1027 """ 1028 from pandas.io.formats.string import StringFormatter 1029 1030 string_formatter = StringFormatter(self.fmt, line_width=line_width) 1031 string = string_formatter.to_string() 1032 return save_to_buffer(string, buf=buf, encoding=encoding) 1033 1034 def to_csv( 1035 self, 1036 path_or_buf: Optional[FilePathOrBuffer[str]] = None, 1037 encoding: Optional[str] = None, 1038 sep: str = ",", 1039 columns: Optional[Sequence[Label]] = None, 1040 index_label: Optional[IndexLabel] = None, 1041 mode: str = "w", 1042 compression: CompressionOptions = "infer", 1043 quoting: Optional[int] = None, 1044 quotechar: str = '"', 1045 line_terminator: Optional[str] = None, 1046 chunksize: Optional[int] = None, 1047 date_format: Optional[str] = None, 1048 doublequote: bool = True, 1049 escapechar: Optional[str] = None, 1050 errors: str = "strict", 1051 storage_options: StorageOptions = None, 1052 ) -> Optional[str]: 1053 """ 1054 Render dataframe as comma-separated file. 1055 """ 1056 from pandas.io.formats.csvs import CSVFormatter 1057 1058 if path_or_buf is None: 1059 created_buffer = True 1060 path_or_buf = StringIO() 1061 else: 1062 created_buffer = False 1063 1064 csv_formatter = CSVFormatter( 1065 path_or_buf=path_or_buf, 1066 line_terminator=line_terminator, 1067 sep=sep, 1068 encoding=encoding, 1069 errors=errors, 1070 compression=compression, 1071 quoting=quoting, 1072 cols=columns, 1073 index_label=index_label, 1074 mode=mode, 1075 chunksize=chunksize, 1076 quotechar=quotechar, 1077 date_format=date_format, 1078 doublequote=doublequote, 1079 escapechar=escapechar, 1080 storage_options=storage_options, 1081 formatter=self.fmt, 1082 ) 1083 csv_formatter.save() 1084 1085 if created_buffer: 1086 assert isinstance(path_or_buf, StringIO) 1087 content = path_or_buf.getvalue() 1088 path_or_buf.close() 1089 return content 1090 1091 return None 1092 1093 1094def save_to_buffer( 1095 string: str, 1096 buf: Optional[FilePathOrBuffer[str]] = None, 1097 encoding: Optional[str] = None, 1098) -> Optional[str]: 1099 """ 1100 Perform serialization. Write to buf or return as string if buf is None. 1101 """ 1102 with get_buffer(buf, encoding=encoding) as f: 1103 f.write(string) 1104 if buf is None: 1105 return f.getvalue() 1106 return None 1107 1108 1109@contextmanager 1110def get_buffer(buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None): 1111 """ 1112 Context manager to open, yield and close buffer for filenames or Path-like 1113 objects, otherwise yield buf unchanged. 1114 """ 1115 if buf is not None: 1116 buf = stringify_path(buf) 1117 else: 1118 buf = StringIO() 1119 1120 if encoding is None: 1121 encoding = "utf-8" 1122 elif not isinstance(buf, str): 1123 raise ValueError("buf is not a file name and encoding is specified.") 1124 1125 if hasattr(buf, "write"): 1126 yield buf 1127 elif isinstance(buf, str): 1128 with open(buf, "w", encoding=encoding, newline="") as f: 1129 # GH#30034 open instead of codecs.open prevents a file leak 1130 # if we have an invalid encoding argument. 1131 # newline="" is needed to roundtrip correctly on 1132 # windows test_to_latex_filename 1133 yield f 1134 else: 1135 raise TypeError("buf is not a file name and it has no write method") 1136 1137 1138# ---------------------------------------------------------------------- 1139# Array formatters 1140 1141 1142def format_array( 1143 values: Any, 1144 formatter: Optional[Callable], 1145 float_format: Optional[FloatFormatType] = None, 1146 na_rep: str = "NaN", 1147 digits: Optional[int] = None, 1148 space: Optional[Union[str, int]] = None, 1149 justify: str = "right", 1150 decimal: str = ".", 1151 leading_space: Optional[bool] = True, 1152 quoting: Optional[int] = None, 1153) -> List[str]: 1154 """ 1155 Format an array for printing. 1156 1157 Parameters 1158 ---------- 1159 values 1160 formatter 1161 float_format 1162 na_rep 1163 digits 1164 space 1165 justify 1166 decimal 1167 leading_space : bool, optional, default True 1168 Whether the array should be formatted with a leading space. 1169 When an array as a column of a Series or DataFrame, we do want 1170 the leading space to pad between columns. 1171 1172 When formatting an Index subclass 1173 (e.g. IntervalIndex._format_native_types), we don't want the 1174 leading space since it should be left-aligned. 1175 1176 Returns 1177 ------- 1178 List[str] 1179 """ 1180 fmt_klass: Type[GenericArrayFormatter] 1181 if is_datetime64_dtype(values.dtype): 1182 fmt_klass = Datetime64Formatter 1183 elif is_datetime64tz_dtype(values.dtype): 1184 fmt_klass = Datetime64TZFormatter 1185 elif is_timedelta64_dtype(values.dtype): 1186 fmt_klass = Timedelta64Formatter 1187 elif is_extension_array_dtype(values.dtype): 1188 fmt_klass = ExtensionArrayFormatter 1189 elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype): 1190 fmt_klass = FloatArrayFormatter 1191 elif is_integer_dtype(values.dtype): 1192 fmt_klass = IntArrayFormatter 1193 else: 1194 fmt_klass = GenericArrayFormatter 1195 1196 if space is None: 1197 space = get_option("display.column_space") 1198 1199 if float_format is None: 1200 float_format = get_option("display.float_format") 1201 1202 if digits is None: 1203 digits = get_option("display.precision") 1204 1205 fmt_obj = fmt_klass( 1206 values, 1207 digits=digits, 1208 na_rep=na_rep, 1209 float_format=float_format, 1210 formatter=formatter, 1211 space=space, 1212 justify=justify, 1213 decimal=decimal, 1214 leading_space=leading_space, 1215 quoting=quoting, 1216 ) 1217 1218 return fmt_obj.get_result() 1219 1220 1221class GenericArrayFormatter: 1222 def __init__( 1223 self, 1224 values: Any, 1225 digits: int = 7, 1226 formatter: Optional[Callable] = None, 1227 na_rep: str = "NaN", 1228 space: Union[str, int] = 12, 1229 float_format: Optional[FloatFormatType] = None, 1230 justify: str = "right", 1231 decimal: str = ".", 1232 quoting: Optional[int] = None, 1233 fixed_width: bool = True, 1234 leading_space: Optional[bool] = True, 1235 ): 1236 self.values = values 1237 self.digits = digits 1238 self.na_rep = na_rep 1239 self.space = space 1240 self.formatter = formatter 1241 self.float_format = float_format 1242 self.justify = justify 1243 self.decimal = decimal 1244 self.quoting = quoting 1245 self.fixed_width = fixed_width 1246 self.leading_space = leading_space 1247 1248 def get_result(self) -> List[str]: 1249 fmt_values = self._format_strings() 1250 return _make_fixed_width(fmt_values, self.justify) 1251 1252 def _format_strings(self) -> List[str]: 1253 if self.float_format is None: 1254 float_format = get_option("display.float_format") 1255 if float_format is None: 1256 precision = get_option("display.precision") 1257 float_format = lambda x: _trim_zeros_single_float( 1258 f"{x: .{precision:d}f}" 1259 ) 1260 else: 1261 float_format = self.float_format 1262 1263 if self.formatter is not None: 1264 formatter = self.formatter 1265 else: 1266 quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE 1267 formatter = partial( 1268 pprint_thing, 1269 escape_chars=("\t", "\r", "\n"), 1270 quote_strings=quote_strings, 1271 ) 1272 1273 def _format(x): 1274 if self.na_rep is not None and is_scalar(x) and isna(x): 1275 try: 1276 # try block for np.isnat specifically 1277 # determine na_rep if x is None or NaT-like 1278 if x is None: 1279 return "None" 1280 elif x is NA: 1281 return str(NA) 1282 elif x is NaT or np.isnat(x): 1283 return "NaT" 1284 except (TypeError, ValueError): 1285 # np.isnat only handles datetime or timedelta objects 1286 pass 1287 return self.na_rep 1288 elif isinstance(x, PandasObject): 1289 return str(x) 1290 else: 1291 # object dtype 1292 return str(formatter(x)) 1293 1294 vals = extract_array(self.values, extract_numpy=True) 1295 1296 is_float_type = ( 1297 lib.map_infer(vals, is_float) 1298 # vals may have 2 or more dimensions 1299 & np.all(notna(vals), axis=tuple(range(1, len(vals.shape)))) 1300 ) 1301 leading_space = self.leading_space 1302 if leading_space is None: 1303 leading_space = is_float_type.any() 1304 1305 fmt_values = [] 1306 for i, v in enumerate(vals): 1307 if not is_float_type[i] and leading_space: 1308 fmt_values.append(f" {_format(v)}") 1309 elif is_float_type[i]: 1310 fmt_values.append(float_format(v)) 1311 else: 1312 if leading_space is False: 1313 # False specifically, so that the default is 1314 # to include a space if we get here. 1315 tpl = "{v}" 1316 else: 1317 tpl = " {v}" 1318 fmt_values.append(tpl.format(v=_format(v))) 1319 1320 return fmt_values 1321 1322 1323class FloatArrayFormatter(GenericArrayFormatter): 1324 def __init__(self, *args, **kwargs): 1325 super().__init__(*args, **kwargs) 1326 1327 # float_format is expected to be a string 1328 # formatter should be used to pass a function 1329 if self.float_format is not None and self.formatter is None: 1330 # GH21625, GH22270 1331 self.fixed_width = False 1332 if callable(self.float_format): 1333 self.formatter = self.float_format 1334 self.float_format = None 1335 1336 def _value_formatter( 1337 self, 1338 float_format: Optional[FloatFormatType] = None, 1339 threshold: Optional[Union[float, int]] = None, 1340 ) -> Callable: 1341 """Returns a function to be applied on each value to format it""" 1342 # the float_format parameter supersedes self.float_format 1343 if float_format is None: 1344 float_format = self.float_format 1345 1346 # we are going to compose different functions, to first convert to 1347 # a string, then replace the decimal symbol, and finally chop according 1348 # to the threshold 1349 1350 # when there is no float_format, we use str instead of '%g' 1351 # because str(0.0) = '0.0' while '%g' % 0.0 = '0' 1352 if float_format: 1353 1354 def base_formatter(v): 1355 assert float_format is not None # for mypy 1356 # pandas\io\formats\format.py:1411: error: "str" not callable 1357 # [operator] 1358 1359 # pandas\io\formats\format.py:1411: error: Unexpected keyword 1360 # argument "value" for "__call__" of "EngFormatter" [call-arg] 1361 return ( 1362 float_format(value=v) # type: ignore[operator,call-arg] 1363 if notna(v) 1364 else self.na_rep 1365 ) 1366 1367 else: 1368 1369 def base_formatter(v): 1370 return str(v) if notna(v) else self.na_rep 1371 1372 if self.decimal != ".": 1373 1374 def decimal_formatter(v): 1375 return base_formatter(v).replace(".", self.decimal, 1) 1376 1377 else: 1378 decimal_formatter = base_formatter 1379 1380 if threshold is None: 1381 return decimal_formatter 1382 1383 def formatter(value): 1384 if notna(value): 1385 if abs(value) > threshold: 1386 return decimal_formatter(value) 1387 else: 1388 return decimal_formatter(0.0) 1389 else: 1390 return self.na_rep 1391 1392 return formatter 1393 1394 def get_result_as_array(self) -> np.ndarray: 1395 """ 1396 Returns the float values converted into strings using 1397 the parameters given at initialisation, as a numpy array 1398 """ 1399 1400 def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): 1401 mask = isna(values) 1402 formatted = np.array( 1403 [ 1404 formatter(val) if not m else na_rep 1405 for val, m in zip(values.ravel(), mask.ravel()) 1406 ] 1407 ).reshape(values.shape) 1408 return formatted 1409 1410 if self.formatter is not None: 1411 return format_with_na_rep(self.values, self.formatter, self.na_rep) 1412 1413 if self.fixed_width: 1414 threshold = get_option("display.chop_threshold") 1415 else: 1416 threshold = None 1417 1418 # if we have a fixed_width, we'll need to try different float_format 1419 def format_values_with(float_format): 1420 formatter = self._value_formatter(float_format, threshold) 1421 1422 # default formatter leaves a space to the left when formatting 1423 # floats, must be consistent for left-justifying NaNs (GH #25061) 1424 if self.justify == "left": 1425 na_rep = " " + self.na_rep 1426 else: 1427 na_rep = self.na_rep 1428 1429 # separate the wheat from the chaff 1430 values = self.values 1431 is_complex = is_complex_dtype(values) 1432 values = format_with_na_rep(values, formatter, na_rep) 1433 1434 if self.fixed_width: 1435 if is_complex: 1436 result = _trim_zeros_complex(values, self.decimal) 1437 else: 1438 result = _trim_zeros_float(values, self.decimal) 1439 return np.asarray(result, dtype="object") 1440 1441 return values 1442 1443 # There is a special default string when we are fixed-width 1444 # The default is otherwise to use str instead of a formatting string 1445 float_format: Optional[FloatFormatType] 1446 if self.float_format is None: 1447 if self.fixed_width: 1448 if self.leading_space is True: 1449 fmt_str = "{value: .{digits:d}f}" 1450 else: 1451 fmt_str = "{value:.{digits:d}f}" 1452 float_format = partial(fmt_str.format, digits=self.digits) 1453 else: 1454 float_format = self.float_format 1455 else: 1456 float_format = lambda value: self.float_format % value 1457 1458 formatted_values = format_values_with(float_format) 1459 1460 if not self.fixed_width: 1461 return formatted_values 1462 1463 # we need do convert to engineering format if some values are too small 1464 # and would appear as 0, or if some values are too big and take too 1465 # much space 1466 1467 if len(formatted_values) > 0: 1468 maxlen = max(len(x) for x in formatted_values) 1469 too_long = maxlen > self.digits + 6 1470 else: 1471 too_long = False 1472 1473 with np.errstate(invalid="ignore"): 1474 abs_vals = np.abs(self.values) 1475 # this is pretty arbitrary for now 1476 # large values: more that 8 characters including decimal symbol 1477 # and first digit, hence > 1e6 1478 has_large_values = (abs_vals > 1e6).any() 1479 has_small_values = ( 1480 (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0) 1481 ).any() 1482 1483 if has_small_values or (too_long and has_large_values): 1484 if self.leading_space is True: 1485 fmt_str = "{value: .{digits:d}e}" 1486 else: 1487 fmt_str = "{value:.{digits:d}e}" 1488 float_format = partial(fmt_str.format, digits=self.digits) 1489 formatted_values = format_values_with(float_format) 1490 1491 return formatted_values 1492 1493 def _format_strings(self) -> List[str]: 1494 return list(self.get_result_as_array()) 1495 1496 1497class IntArrayFormatter(GenericArrayFormatter): 1498 def _format_strings(self) -> List[str]: 1499 if self.leading_space is False: 1500 formatter_str = lambda x: f"{x:d}".format(x=x) 1501 else: 1502 formatter_str = lambda x: f"{x: d}".format(x=x) 1503 formatter = self.formatter or formatter_str 1504 fmt_values = [formatter(x) for x in self.values] 1505 return fmt_values 1506 1507 1508class Datetime64Formatter(GenericArrayFormatter): 1509 def __init__( 1510 self, 1511 values: Union[np.ndarray, "Series", DatetimeIndex, DatetimeArray], 1512 nat_rep: str = "NaT", 1513 date_format: None = None, 1514 **kwargs, 1515 ): 1516 super().__init__(values, **kwargs) 1517 self.nat_rep = nat_rep 1518 self.date_format = date_format 1519 1520 def _format_strings(self) -> List[str]: 1521 """ we by definition have DO NOT have a TZ """ 1522 values = self.values 1523 1524 if not isinstance(values, DatetimeIndex): 1525 values = DatetimeIndex(values) 1526 1527 if self.formatter is not None and callable(self.formatter): 1528 return [self.formatter(x) for x in values] 1529 1530 fmt_values = values._data._format_native_types( 1531 na_rep=self.nat_rep, date_format=self.date_format 1532 ) 1533 return fmt_values.tolist() 1534 1535 1536class ExtensionArrayFormatter(GenericArrayFormatter): 1537 def _format_strings(self) -> List[str]: 1538 values = extract_array(self.values, extract_numpy=True) 1539 1540 formatter = self.formatter 1541 if formatter is None: 1542 formatter = values._formatter(boxed=True) 1543 1544 if is_categorical_dtype(values.dtype): 1545 # Categorical is special for now, so that we can preserve tzinfo 1546 array = values._internal_get_values() 1547 else: 1548 array = np.asarray(values) 1549 1550 fmt_values = format_array( 1551 array, 1552 formatter, 1553 float_format=self.float_format, 1554 na_rep=self.na_rep, 1555 digits=self.digits, 1556 space=self.space, 1557 justify=self.justify, 1558 decimal=self.decimal, 1559 leading_space=self.leading_space, 1560 quoting=self.quoting, 1561 ) 1562 return fmt_values 1563 1564 1565def format_percentiles( 1566 percentiles: Union[ 1567 np.ndarray, List[Union[int, float]], List[float], List[Union[str, float]] 1568 ] 1569) -> List[str]: 1570 """ 1571 Outputs rounded and formatted percentiles. 1572 1573 Parameters 1574 ---------- 1575 percentiles : list-like, containing floats from interval [0,1] 1576 1577 Returns 1578 ------- 1579 formatted : list of strings 1580 1581 Notes 1582 ----- 1583 Rounding precision is chosen so that: (1) if any two elements of 1584 ``percentiles`` differ, they remain different after rounding 1585 (2) no entry is *rounded* to 0% or 100%. 1586 Any non-integer is always rounded to at least 1 decimal place. 1587 1588 Examples 1589 -------- 1590 Keeps all entries different after rounding: 1591 1592 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) 1593 ['1.999%', '2.001%', '50%', '66.667%', '99.99%'] 1594 1595 No element is rounded to 0% or 100% (unless already equal to it). 1596 Duplicates are allowed: 1597 1598 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) 1599 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] 1600 """ 1601 percentiles = np.asarray(percentiles) 1602 1603 # It checks for np.NaN as well 1604 with np.errstate(invalid="ignore"): 1605 if ( 1606 not is_numeric_dtype(percentiles) 1607 or not np.all(percentiles >= 0) 1608 or not np.all(percentiles <= 1) 1609 ): 1610 raise ValueError("percentiles should all be in the interval [0,1]") 1611 1612 percentiles = 100 * percentiles 1613 int_idx = np.isclose(percentiles.astype(int), percentiles) 1614 1615 if np.all(int_idx): 1616 out = percentiles.astype(int).astype(str) 1617 return [i + "%" for i in out] 1618 1619 unique_pcts = np.unique(percentiles) 1620 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None 1621 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None 1622 1623 # Least precision that keeps percentiles unique after rounding 1624 prec = -np.floor( 1625 np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end))) 1626 ).astype(int) 1627 prec = max(1, prec) 1628 out = np.empty_like(percentiles, dtype=object) 1629 out[int_idx] = percentiles[int_idx].astype(int).astype(str) 1630 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str) 1631 return [i + "%" for i in out] 1632 1633 1634def is_dates_only( 1635 values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex] 1636) -> bool: 1637 # return a boolean if we are only dates (and don't have a timezone) 1638 if not isinstance(values, Index): 1639 values = values.ravel() 1640 1641 values = DatetimeIndex(values) 1642 if values.tz is not None: 1643 return False 1644 1645 values_int = values.asi8 1646 consider_values = values_int != iNaT 1647 one_day_nanos = 86400 * 1e9 1648 even_days = ( 1649 np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0 1650 ) 1651 if even_days: 1652 return True 1653 return False 1654 1655 1656def _format_datetime64(x: Union[NaTType, Timestamp], nat_rep: str = "NaT") -> str: 1657 if x is NaT: 1658 return nat_rep 1659 1660 return str(x) 1661 1662 1663def _format_datetime64_dateonly( 1664 x: Union[NaTType, Timestamp], 1665 nat_rep: str = "NaT", 1666 date_format: Optional[str] = None, 1667) -> str: 1668 if x is NaT: 1669 return nat_rep 1670 1671 if date_format: 1672 return x.strftime(date_format) 1673 else: 1674 return x._date_repr 1675 1676 1677def get_format_datetime64( 1678 is_dates_only: bool, nat_rep: str = "NaT", date_format: Optional[str] = None 1679) -> Callable: 1680 1681 if is_dates_only: 1682 return lambda x: _format_datetime64_dateonly( 1683 x, nat_rep=nat_rep, date_format=date_format 1684 ) 1685 else: 1686 return lambda x: _format_datetime64(x, nat_rep=nat_rep) 1687 1688 1689def get_format_datetime64_from_values( 1690 values: Union[np.ndarray, DatetimeArray, DatetimeIndex], date_format: Optional[str] 1691) -> Optional[str]: 1692 """ given values and a date_format, return a string format """ 1693 if isinstance(values, np.ndarray) and values.ndim > 1: 1694 # We don't actually care about the order of values, and DatetimeIndex 1695 # only accepts 1D values 1696 values = values.ravel() 1697 1698 ido = is_dates_only(values) 1699 if ido: 1700 return date_format or "%Y-%m-%d" 1701 return date_format 1702 1703 1704class Datetime64TZFormatter(Datetime64Formatter): 1705 def _format_strings(self) -> List[str]: 1706 """ we by definition have a TZ """ 1707 values = self.values.astype(object) 1708 ido = is_dates_only(values) 1709 formatter = self.formatter or get_format_datetime64( 1710 ido, date_format=self.date_format 1711 ) 1712 fmt_values = [formatter(x) for x in values] 1713 1714 return fmt_values 1715 1716 1717class Timedelta64Formatter(GenericArrayFormatter): 1718 def __init__( 1719 self, 1720 values: Union[np.ndarray, TimedeltaIndex], 1721 nat_rep: str = "NaT", 1722 box: bool = False, 1723 **kwargs, 1724 ): 1725 super().__init__(values, **kwargs) 1726 self.nat_rep = nat_rep 1727 self.box = box 1728 1729 def _format_strings(self) -> List[str]: 1730 formatter = self.formatter or get_format_timedelta64( 1731 self.values, nat_rep=self.nat_rep, box=self.box 1732 ) 1733 return [formatter(x) for x in self.values] 1734 1735 1736def get_format_timedelta64( 1737 values: Union[np.ndarray, TimedeltaIndex, TimedeltaArray], 1738 nat_rep: str = "NaT", 1739 box: bool = False, 1740) -> Callable: 1741 """ 1742 Return a formatter function for a range of timedeltas. 1743 These will all have the same format argument 1744 1745 If box, then show the return in quotes 1746 """ 1747 values_int = values.astype(np.int64) 1748 1749 consider_values = values_int != iNaT 1750 1751 one_day_nanos = 86400 * 1e9 1752 even_days = ( 1753 np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0 1754 ) 1755 1756 if even_days: 1757 format = None 1758 else: 1759 format = "long" 1760 1761 def _formatter(x): 1762 if x is None or (is_scalar(x) and isna(x)): 1763 return nat_rep 1764 1765 if not isinstance(x, Timedelta): 1766 x = Timedelta(x) 1767 result = x._repr_base(format=format) 1768 if box: 1769 result = f"'{result}'" 1770 return result 1771 1772 return _formatter 1773 1774 1775def _make_fixed_width( 1776 strings: List[str], 1777 justify: str = "right", 1778 minimum: Optional[int] = None, 1779 adj: Optional[TextAdjustment] = None, 1780) -> List[str]: 1781 1782 if len(strings) == 0 or justify == "all": 1783 return strings 1784 1785 if adj is None: 1786 adjustment = get_adjustment() 1787 else: 1788 adjustment = adj 1789 1790 max_len = max(adjustment.len(x) for x in strings) 1791 1792 if minimum is not None: 1793 max_len = max(minimum, max_len) 1794 1795 conf_max = get_option("display.max_colwidth") 1796 if conf_max is not None and max_len > conf_max: 1797 max_len = conf_max 1798 1799 def just(x: str) -> str: 1800 if conf_max is not None: 1801 if (conf_max > 3) & (adjustment.len(x) > max_len): 1802 x = x[: max_len - 3] + "..." 1803 return x 1804 1805 strings = [just(x) for x in strings] 1806 result = adjustment.justify(strings, max_len, mode=justify) 1807 return result 1808 1809 1810def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> List[str]: 1811 """ 1812 Separates the real and imaginary parts from the complex number, and 1813 executes the _trim_zeros_float method on each of those. 1814 """ 1815 trimmed = [ 1816 "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal)) 1817 for x in str_complexes 1818 ] 1819 1820 # pad strings to the length of the longest trimmed string for alignment 1821 lengths = [len(s) for s in trimmed] 1822 max_length = max(lengths) 1823 padded = [ 1824 s[: -((k - 1) // 2 + 1)] # real part 1825 + (max_length - k) // 2 * "0" 1826 + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / - 1827 + s[-((k - 1) // 2) : -1] # imaginary part 1828 + (max_length - k) // 2 * "0" 1829 + s[-1] 1830 for s, k in zip(trimmed, lengths) 1831 ] 1832 return padded 1833 1834 1835def _trim_zeros_single_float(str_float: str) -> str: 1836 """ 1837 Trims trailing zeros after a decimal point, 1838 leaving just one if necessary. 1839 """ 1840 str_float = str_float.rstrip("0") 1841 if str_float.endswith("."): 1842 str_float += "0" 1843 1844 return str_float 1845 1846 1847def _trim_zeros_float( 1848 str_floats: Union[np.ndarray, List[str]], decimal: str = "." 1849) -> List[str]: 1850 """ 1851 Trims the maximum number of trailing zeros equally from 1852 all numbers containing decimals, leaving just one if 1853 necessary. 1854 """ 1855 trimmed = str_floats 1856 number_regex = re.compile(fr"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$") 1857 1858 def is_number_with_decimal(x): 1859 return re.match(number_regex, x) is not None 1860 1861 def should_trim(values: Union[np.ndarray, List[str]]) -> bool: 1862 """ 1863 Determine if an array of strings should be trimmed. 1864 1865 Returns True if all numbers containing decimals (defined by the 1866 above regular expression) within the array end in a zero, otherwise 1867 returns False. 1868 """ 1869 numbers = [x for x in values if is_number_with_decimal(x)] 1870 return len(numbers) > 0 and all(x.endswith("0") for x in numbers) 1871 1872 while should_trim(trimmed): 1873 trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed] 1874 1875 # leave one 0 after the decimal points if need be. 1876 result = [ 1877 x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x 1878 for x in trimmed 1879 ] 1880 return result 1881 1882 1883def _has_names(index: Index) -> bool: 1884 if isinstance(index, MultiIndex): 1885 return com.any_not_none(*index.names) 1886 else: 1887 return index.name is not None 1888 1889 1890class EngFormatter: 1891 """ 1892 Formats float values according to engineering format. 1893 1894 Based on matplotlib.ticker.EngFormatter 1895 """ 1896 1897 # The SI engineering prefixes 1898 ENG_PREFIXES = { 1899 -24: "y", 1900 -21: "z", 1901 -18: "a", 1902 -15: "f", 1903 -12: "p", 1904 -9: "n", 1905 -6: "u", 1906 -3: "m", 1907 0: "", 1908 3: "k", 1909 6: "M", 1910 9: "G", 1911 12: "T", 1912 15: "P", 1913 18: "E", 1914 21: "Z", 1915 24: "Y", 1916 } 1917 1918 def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False): 1919 self.accuracy = accuracy 1920 self.use_eng_prefix = use_eng_prefix 1921 1922 def __call__(self, num: Union[int, float]) -> str: 1923 """ 1924 Formats a number in engineering notation, appending a letter 1925 representing the power of 1000 of the original number. Some examples: 1926 1927 >>> format_eng(0) # for self.accuracy = 0 1928 ' 0' 1929 1930 >>> format_eng(1000000) # for self.accuracy = 1, 1931 # self.use_eng_prefix = True 1932 ' 1.0M' 1933 1934 >>> format_eng("-1e-6") # for self.accuracy = 2 1935 # self.use_eng_prefix = False 1936 '-1.00E-06' 1937 1938 @param num: the value to represent 1939 @type num: either a numeric value or a string that can be converted to 1940 a numeric value (as per decimal.Decimal constructor) 1941 1942 @return: engineering formatted string 1943 """ 1944 dnum = decimal.Decimal(str(num)) 1945 1946 if decimal.Decimal.is_nan(dnum): 1947 return "NaN" 1948 1949 if decimal.Decimal.is_infinite(dnum): 1950 return "inf" 1951 1952 sign = 1 1953 1954 if dnum < 0: # pragma: no cover 1955 sign = -1 1956 dnum = -dnum 1957 1958 if dnum != 0: 1959 pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3)) 1960 else: 1961 pow10 = decimal.Decimal(0) 1962 1963 pow10 = pow10.min(max(self.ENG_PREFIXES.keys())) 1964 pow10 = pow10.max(min(self.ENG_PREFIXES.keys())) 1965 int_pow10 = int(pow10) 1966 1967 if self.use_eng_prefix: 1968 prefix = self.ENG_PREFIXES[int_pow10] 1969 else: 1970 if int_pow10 < 0: 1971 prefix = f"E-{-int_pow10:02d}" 1972 else: 1973 prefix = f"E+{int_pow10:02d}" 1974 1975 mant = sign * dnum / (10 ** pow10) 1976 1977 if self.accuracy is None: # pragma: no cover 1978 format_str = "{mant: g}{prefix}" 1979 else: 1980 format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}" 1981 1982 formatted = format_str.format(mant=mant, prefix=prefix) 1983 1984 return formatted 1985 1986 1987def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None: 1988 """ 1989 Alter default behavior on how float is formatted in DataFrame. 1990 Format float in engineering format. By accuracy, we mean the number of 1991 decimal digits after the floating point. 1992 1993 See also EngFormatter. 1994 """ 1995 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix)) 1996 set_option("display.column_space", max(12, accuracy + 9)) 1997 1998 1999def get_level_lengths( 2000 levels: Any, sentinel: Union[bool, object, str] = "" 2001) -> List[Dict[int, int]]: 2002 """ 2003 For each index in each level the function returns lengths of indexes. 2004 2005 Parameters 2006 ---------- 2007 levels : list of lists 2008 List of values on for level. 2009 sentinel : string, optional 2010 Value which states that no new index starts on there. 2011 2012 Returns 2013 ------- 2014 Returns list of maps. For each level returns map of indexes (key is index 2015 in row and value is length of index). 2016 """ 2017 if len(levels) == 0: 2018 return [] 2019 2020 control = [True] * len(levels[0]) 2021 2022 result = [] 2023 for level in levels: 2024 last_index = 0 2025 2026 lengths = {} 2027 for i, key in enumerate(level): 2028 if control[i] and key == sentinel: 2029 pass 2030 else: 2031 control[i] = False 2032 lengths[last_index] = i - last_index 2033 last_index = i 2034 2035 lengths[last_index] = len(level) - last_index 2036 2037 result.append(lengths) 2038 2039 return result 2040 2041 2042def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: 2043 """ 2044 Appends lines to a buffer. 2045 2046 Parameters 2047 ---------- 2048 buf 2049 The buffer to write to 2050 lines 2051 The lines to append. 2052 """ 2053 if any(isinstance(x, str) for x in lines): 2054 lines = [str(x) for x in lines] 2055 buf.write("\n".join(lines)) 2056