1"""
2Internal module for formatting output data in csv, html,
3and latex files. This module also applies to display formatting.
4"""
5
6from contextlib import contextmanager
7from csv import QUOTE_NONE, QUOTE_NONNUMERIC
8import decimal
9from functools import partial
10from io import StringIO
11import math
12import re
13from shutil import get_terminal_size
14from typing import (
15    IO,
16    TYPE_CHECKING,
17    Any,
18    Callable,
19    Dict,
20    Iterable,
21    List,
22    Mapping,
23    Optional,
24    Sequence,
25    Tuple,
26    Type,
27    Union,
28    cast,
29)
30from unicodedata import east_asian_width
31
32import numpy as np
33
34from pandas._config.config import get_option, set_option
35
36from pandas._libs import lib
37from pandas._libs.missing import NA
38from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
39from pandas._libs.tslibs.nattype import NaTType
40from pandas._typing import (
41    ArrayLike,
42    CompressionOptions,
43    FilePathOrBuffer,
44    FloatFormatType,
45    IndexLabel,
46    Label,
47    StorageOptions,
48)
49
50from pandas.core.dtypes.common import (
51    is_categorical_dtype,
52    is_complex_dtype,
53    is_datetime64_dtype,
54    is_datetime64tz_dtype,
55    is_extension_array_dtype,
56    is_float,
57    is_float_dtype,
58    is_integer,
59    is_integer_dtype,
60    is_list_like,
61    is_numeric_dtype,
62    is_scalar,
63    is_timedelta64_dtype,
64)
65from pandas.core.dtypes.missing import isna, notna
66
67from pandas.core.arrays.datetimes import DatetimeArray
68from pandas.core.arrays.timedeltas import TimedeltaArray
69from pandas.core.base import PandasObject
70import pandas.core.common as com
71from pandas.core.construction import extract_array
72from pandas.core.indexes.api import Index, MultiIndex, PeriodIndex, ensure_index
73from pandas.core.indexes.datetimes import DatetimeIndex
74from pandas.core.indexes.timedeltas import TimedeltaIndex
75from pandas.core.reshape.concat import concat
76
77from pandas.io.common import stringify_path
78from pandas.io.formats.printing import adjoin, justify, pprint_thing
79
80if TYPE_CHECKING:
81    from pandas import Categorical, DataFrame, Series
82
83
84FormattersType = Union[
85    List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable]
86]
87ColspaceType = Mapping[Label, Union[str, int]]
88ColspaceArgType = Union[
89    str, int, Sequence[Union[str, int]], Mapping[Label, Union[str, int]]
90]
91
92common_docstring = """
93        Parameters
94        ----------
95        buf : str, Path or StringIO-like, optional, default None
96            Buffer to write to. If None, the output is returned as a string.
97        columns : sequence, optional, default None
98            The subset of columns to write. Writes all columns by default.
99        col_space : %(col_space_type)s, optional
100            %(col_space)s.
101        header : %(header_type)s, optional
102            %(header)s.
103        index : bool, optional, default True
104            Whether to print index (row) labels.
105        na_rep : str, optional, default 'NaN'
106            String representation of ``NaN`` to use.
107        formatters : list, tuple or dict of one-param. functions, optional
108            Formatter functions to apply to columns' elements by position or
109            name.
110            The result of each function must be a unicode string.
111            List/tuple must be of length equal to the number of columns.
112        float_format : one-parameter function, optional, default None
113            Formatter function to apply to columns' elements if they are
114            floats. This function must return a unicode string and will be
115            applied only to the non-``NaN`` elements, with ``NaN`` being
116            handled by ``na_rep``.
117
118            .. versionchanged:: 1.2.0
119
120        sparsify : bool, optional, default True
121            Set to False for a DataFrame with a hierarchical index to print
122            every multiindex key at each row.
123        index_names : bool, optional, default True
124            Prints the names of the indexes.
125        justify : str, default None
126            How to justify the column labels. If None uses the option from
127            the print configuration (controlled by set_option), 'right' out
128            of the box. Valid values are
129
130            * left
131            * right
132            * center
133            * justify
134            * justify-all
135            * start
136            * end
137            * inherit
138            * match-parent
139            * initial
140            * unset.
141        max_rows : int, optional
142            Maximum number of rows to display in the console.
143        min_rows : int, optional
144            The number of rows to display in the console in a truncated repr
145            (when number of rows is above `max_rows`).
146        max_cols : int, optional
147            Maximum number of columns to display in the console.
148        show_dimensions : bool, default False
149            Display DataFrame dimensions (number of rows by number of columns).
150        decimal : str, default '.'
151            Character recognized as decimal separator, e.g. ',' in Europe.
152    """
153
154_VALID_JUSTIFY_PARAMETERS = (
155    "left",
156    "right",
157    "center",
158    "justify",
159    "justify-all",
160    "start",
161    "end",
162    "inherit",
163    "match-parent",
164    "initial",
165    "unset",
166)
167
168return_docstring = """
169        Returns
170        -------
171        str or None
172            If buf is None, returns the result as a string. Otherwise returns
173            None.
174    """
175
176
177class CategoricalFormatter:
178    def __init__(
179        self,
180        categorical: "Categorical",
181        buf: Optional[IO[str]] = None,
182        length: bool = True,
183        na_rep: str = "NaN",
184        footer: bool = True,
185    ):
186        self.categorical = categorical
187        self.buf = buf if buf is not None else StringIO("")
188        self.na_rep = na_rep
189        self.length = length
190        self.footer = footer
191        self.quoting = QUOTE_NONNUMERIC
192
193    def _get_footer(self) -> str:
194        footer = ""
195
196        if self.length:
197            if footer:
198                footer += ", "
199            footer += f"Length: {len(self.categorical)}"
200
201        level_info = self.categorical._repr_categories_info()
202
203        # Levels are added in a newline
204        if footer:
205            footer += "\n"
206        footer += level_info
207
208        return str(footer)
209
210    def _get_formatted_values(self) -> List[str]:
211        return format_array(
212            self.categorical._internal_get_values(),
213            None,
214            float_format=None,
215            na_rep=self.na_rep,
216            quoting=self.quoting,
217        )
218
219    def to_string(self) -> str:
220        categorical = self.categorical
221
222        if len(categorical) == 0:
223            if self.footer:
224                return self._get_footer()
225            else:
226                return ""
227
228        fmt_values = self._get_formatted_values()
229
230        fmt_values = [i.strip() for i in fmt_values]
231        values = ", ".join(fmt_values)
232        result = ["[" + values + "]"]
233        if self.footer:
234            footer = self._get_footer()
235            if footer:
236                result.append(footer)
237
238        return str("\n".join(result))
239
240
241class SeriesFormatter:
242    def __init__(
243        self,
244        series: "Series",
245        buf: Optional[IO[str]] = None,
246        length: Union[bool, str] = True,
247        header: bool = True,
248        index: bool = True,
249        na_rep: str = "NaN",
250        name: bool = False,
251        float_format: Optional[str] = None,
252        dtype: bool = True,
253        max_rows: Optional[int] = None,
254        min_rows: Optional[int] = None,
255    ):
256        self.series = series
257        self.buf = buf if buf is not None else StringIO()
258        self.name = name
259        self.na_rep = na_rep
260        self.header = header
261        self.length = length
262        self.index = index
263        self.max_rows = max_rows
264        self.min_rows = min_rows
265
266        if float_format is None:
267            float_format = get_option("display.float_format")
268        self.float_format = float_format
269        self.dtype = dtype
270        self.adj = get_adjustment()
271
272        self._chk_truncate()
273
274    def _chk_truncate(self) -> None:
275        self.tr_row_num: Optional[int]
276
277        min_rows = self.min_rows
278        max_rows = self.max_rows
279        # truncation determined by max_rows, actual truncated number of rows
280        # used below by min_rows
281        is_truncated_vertically = max_rows and (len(self.series) > max_rows)
282        series = self.series
283        if is_truncated_vertically:
284            max_rows = cast(int, max_rows)
285            if min_rows:
286                # if min_rows is set (not None or 0), set max_rows to minimum
287                # of both
288                max_rows = min(min_rows, max_rows)
289            if max_rows == 1:
290                row_num = max_rows
291                series = series.iloc[:max_rows]
292            else:
293                row_num = max_rows // 2
294                series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
295            self.tr_row_num = row_num
296        else:
297            self.tr_row_num = None
298        self.tr_series = series
299        self.is_truncated_vertically = is_truncated_vertically
300
301    def _get_footer(self) -> str:
302        name = self.series.name
303        footer = ""
304
305        if getattr(self.series.index, "freq", None) is not None:
306            assert isinstance(
307                self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
308            )
309            footer += f"Freq: {self.series.index.freqstr}"
310
311        if self.name is not False and name is not None:
312            if footer:
313                footer += ", "
314
315            series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n"))
316            footer += f"Name: {series_name}"
317
318        if self.length is True or (
319            self.length == "truncate" and self.is_truncated_vertically
320        ):
321            if footer:
322                footer += ", "
323            footer += f"Length: {len(self.series)}"
324
325        if self.dtype is not False and self.dtype is not None:
326            dtype_name = getattr(self.tr_series.dtype, "name", None)
327            if dtype_name:
328                if footer:
329                    footer += ", "
330                footer += f"dtype: {pprint_thing(dtype_name)}"
331
332        # level infos are added to the end and in a new line, like it is done
333        # for Categoricals
334        if is_categorical_dtype(self.tr_series.dtype):
335            level_info = self.tr_series._values._repr_categories_info()
336            if footer:
337                footer += "\n"
338            footer += level_info
339
340        return str(footer)
341
342    def _get_formatted_index(self) -> Tuple[List[str], bool]:
343        index = self.tr_series.index
344
345        if isinstance(index, MultiIndex):
346            have_header = any(name for name in index.names)
347            fmt_index = index.format(names=True)
348        else:
349            have_header = index.name is not None
350            fmt_index = index.format(name=True)
351        return fmt_index, have_header
352
353    def _get_formatted_values(self) -> List[str]:
354        return format_array(
355            self.tr_series._values,
356            None,
357            float_format=self.float_format,
358            na_rep=self.na_rep,
359            leading_space=self.index,
360        )
361
362    def to_string(self) -> str:
363        series = self.tr_series
364        footer = self._get_footer()
365
366        if len(series) == 0:
367            return f"{type(self.series).__name__}([], {footer})"
368
369        fmt_index, have_header = self._get_formatted_index()
370        fmt_values = self._get_formatted_values()
371
372        if self.is_truncated_vertically:
373            n_header_rows = 0
374            row_num = self.tr_row_num
375            row_num = cast(int, row_num)
376            width = self.adj.len(fmt_values[row_num - 1])
377            if width > 3:
378                dot_str = "..."
379            else:
380                dot_str = ".."
381            # Series uses mode=center because it has single value columns
382            # DataFrame uses mode=left
383            dot_str = self.adj.justify([dot_str], width, mode="center")[0]
384            fmt_values.insert(row_num + n_header_rows, dot_str)
385            fmt_index.insert(row_num + 1, "")
386
387        if self.index:
388            result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
389        else:
390            result = self.adj.adjoin(3, fmt_values)
391
392        if self.header and have_header:
393            result = fmt_index[0] + "\n" + result
394
395        if footer:
396            result += "\n" + footer
397
398        return str("".join(result))
399
400
401class TextAdjustment:
402    def __init__(self):
403        self.encoding = get_option("display.encoding")
404
405    def len(self, text: str) -> int:
406        return len(text)
407
408    def justify(self, texts: Any, max_len: int, mode: str = "right") -> List[str]:
409        return justify(texts, max_len, mode=mode)
410
411    def adjoin(self, space: int, *lists, **kwargs) -> str:
412        return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
413
414
415class EastAsianTextAdjustment(TextAdjustment):
416    def __init__(self):
417        super().__init__()
418        if get_option("display.unicode.ambiguous_as_wide"):
419            self.ambiguous_width = 2
420        else:
421            self.ambiguous_width = 1
422
423        # Definition of East Asian Width
424        # https://unicode.org/reports/tr11/
425        # Ambiguous width can be changed by option
426        self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
427
428    def len(self, text: str) -> int:
429        """
430        Calculate display width considering unicode East Asian Width
431        """
432        if not isinstance(text, str):
433            return len(text)
434
435        return sum(
436            self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
437        )
438
439    def justify(
440        self, texts: Iterable[str], max_len: int, mode: str = "right"
441    ) -> List[str]:
442        # re-calculate padding space per str considering East Asian Width
443        def _get_pad(t):
444            return max_len - self.len(t) + len(t)
445
446        if mode == "left":
447            return [x.ljust(_get_pad(x)) for x in texts]
448        elif mode == "center":
449            return [x.center(_get_pad(x)) for x in texts]
450        else:
451            return [x.rjust(_get_pad(x)) for x in texts]
452
453
454def get_adjustment() -> TextAdjustment:
455    use_east_asian_width = get_option("display.unicode.east_asian_width")
456    if use_east_asian_width:
457        return EastAsianTextAdjustment()
458    else:
459        return TextAdjustment()
460
461
462class DataFrameFormatter:
463    """Class for processing dataframe formatting options and data."""
464
465    __doc__ = __doc__ if __doc__ else ""
466    __doc__ += common_docstring + return_docstring
467
468    def __init__(
469        self,
470        frame: "DataFrame",
471        columns: Optional[Sequence[str]] = None,
472        col_space: Optional[ColspaceArgType] = None,
473        header: Union[bool, Sequence[str]] = True,
474        index: bool = True,
475        na_rep: str = "NaN",
476        formatters: Optional[FormattersType] = None,
477        justify: Optional[str] = None,
478        float_format: Optional[FloatFormatType] = None,
479        sparsify: Optional[bool] = None,
480        index_names: bool = True,
481        max_rows: Optional[int] = None,
482        min_rows: Optional[int] = None,
483        max_cols: Optional[int] = None,
484        show_dimensions: Union[bool, str] = False,
485        decimal: str = ".",
486        bold_rows: bool = False,
487        escape: bool = True,
488    ):
489        self.frame = frame
490        self.columns = self._initialize_columns(columns)
491        self.col_space = self._initialize_colspace(col_space)
492        self.header = header
493        self.index = index
494        self.na_rep = na_rep
495        self.formatters = self._initialize_formatters(formatters)
496        self.justify = self._initialize_justify(justify)
497        self.float_format = float_format
498        self.sparsify = self._initialize_sparsify(sparsify)
499        self.show_index_names = index_names
500        self.decimal = decimal
501        self.bold_rows = bold_rows
502        self.escape = escape
503        self.max_rows = max_rows
504        self.min_rows = min_rows
505        self.max_cols = max_cols
506        self.show_dimensions = show_dimensions
507
508        self.max_cols_fitted = self._calc_max_cols_fitted()
509        self.max_rows_fitted = self._calc_max_rows_fitted()
510
511        self.tr_frame = self.frame
512        self.truncate()
513        self.adj = get_adjustment()
514
515    def get_strcols(self) -> List[List[str]]:
516        """
517        Render a DataFrame to a list of columns (as lists of strings).
518        """
519        strcols = self._get_strcols_without_index()
520
521        if self.index:
522            str_index = self._get_formatted_index(self.tr_frame)
523            strcols.insert(0, str_index)
524
525        return strcols
526
527    @property
528    def should_show_dimensions(self) -> bool:
529        return self.show_dimensions is True or (
530            self.show_dimensions == "truncate" and self.is_truncated
531        )
532
533    @property
534    def is_truncated(self) -> bool:
535        return bool(self.is_truncated_horizontally or self.is_truncated_vertically)
536
537    @property
538    def is_truncated_horizontally(self) -> bool:
539        return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted))
540
541    @property
542    def is_truncated_vertically(self) -> bool:
543        return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted))
544
545    @property
546    def dimensions_info(self) -> str:
547        return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]"
548
549    @property
550    def has_index_names(self) -> bool:
551        return _has_names(self.frame.index)
552
553    @property
554    def has_column_names(self) -> bool:
555        return _has_names(self.frame.columns)
556
557    @property
558    def show_row_idx_names(self) -> bool:
559        return all((self.has_index_names, self.index, self.show_index_names))
560
561    @property
562    def show_col_idx_names(self) -> bool:
563        return all((self.has_column_names, self.show_index_names, self.header))
564
565    @property
566    def max_rows_displayed(self) -> int:
567        return min(self.max_rows or len(self.frame), len(self.frame))
568
569    def _initialize_sparsify(self, sparsify: Optional[bool]) -> bool:
570        if sparsify is None:
571            return get_option("display.multi_sparse")
572        return sparsify
573
574    def _initialize_formatters(
575        self, formatters: Optional[FormattersType]
576    ) -> FormattersType:
577        if formatters is None:
578            return {}
579        elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):
580            return formatters
581        else:
582            raise ValueError(
583                f"Formatters length({len(formatters)}) should match "
584                f"DataFrame number of columns({len(self.frame.columns)})"
585            )
586
587    def _initialize_justify(self, justify: Optional[str]) -> str:
588        if justify is None:
589            return get_option("display.colheader_justify")
590        else:
591            return justify
592
593    def _initialize_columns(self, columns: Optional[Sequence[str]]) -> Index:
594        if columns is not None:
595            cols = ensure_index(columns)
596            self.frame = self.frame[cols]
597            return cols
598        else:
599            return self.frame.columns
600
601    def _initialize_colspace(
602        self, col_space: Optional[ColspaceArgType]
603    ) -> ColspaceType:
604        result: ColspaceType
605
606        if col_space is None:
607            result = {}
608        elif isinstance(col_space, (int, str)):
609            result = {"": col_space}
610            result.update({column: col_space for column in self.frame.columns})
611        elif isinstance(col_space, Mapping):
612            for column in col_space.keys():
613                if column not in self.frame.columns and column != "":
614                    raise ValueError(
615                        f"Col_space is defined for an unknown column: {column}"
616                    )
617            result = col_space
618        else:
619            if len(self.frame.columns) != len(col_space):
620                raise ValueError(
621                    f"Col_space length({len(col_space)}) should match "
622                    f"DataFrame number of columns({len(self.frame.columns)})"
623                )
624            result = dict(zip(self.frame.columns, col_space))
625        return result
626
627    def _calc_max_cols_fitted(self) -> Optional[int]:
628        """Number of columns fitting the screen."""
629        if not self._is_in_terminal():
630            return self.max_cols
631
632        width, _ = get_terminal_size()
633        if self._is_screen_narrow(width):
634            return width
635        else:
636            return self.max_cols
637
638    def _calc_max_rows_fitted(self) -> Optional[int]:
639        """Number of rows with data fitting the screen."""
640        max_rows: Optional[int]
641
642        if self._is_in_terminal():
643            _, height = get_terminal_size()
644            if self.max_rows == 0:
645                # rows available to fill with actual data
646                return height - self._get_number_of_auxillary_rows()
647
648            if self._is_screen_short(height):
649                max_rows = height
650            else:
651                max_rows = self.max_rows
652        else:
653            max_rows = self.max_rows
654
655        return self._adjust_max_rows(max_rows)
656
657    def _adjust_max_rows(self, max_rows: Optional[int]) -> Optional[int]:
658        """Adjust max_rows using display logic.
659
660        See description here:
661        https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options
662
663        GH #37359
664        """
665        if max_rows:
666            if (len(self.frame) > max_rows) and self.min_rows:
667                # if truncated, set max_rows showed to min_rows
668                max_rows = min(self.min_rows, max_rows)
669        return max_rows
670
671    def _is_in_terminal(self) -> bool:
672        """Check if the output is to be shown in terminal."""
673        return bool(self.max_cols == 0 or self.max_rows == 0)
674
675    def _is_screen_narrow(self, max_width) -> bool:
676        return bool(self.max_cols == 0 and len(self.frame.columns) > max_width)
677
678    def _is_screen_short(self, max_height) -> bool:
679        return bool(self.max_rows == 0 and len(self.frame) > max_height)
680
681    def _get_number_of_auxillary_rows(self) -> int:
682        """Get number of rows occupied by prompt, dots and dimension info."""
683        dot_row = 1
684        prompt_row = 1
685        num_rows = dot_row + prompt_row
686
687        if self.show_dimensions:
688            num_rows += len(self.dimensions_info.splitlines())
689
690        if self.header:
691            num_rows += 1
692
693        return num_rows
694
695    def truncate(self) -> None:
696        """
697        Check whether the frame should be truncated. If so, slice the frame up.
698        """
699        if self.is_truncated_horizontally:
700            self._truncate_horizontally()
701
702        if self.is_truncated_vertically:
703            self._truncate_vertically()
704
705    def _truncate_horizontally(self) -> None:
706        """Remove columns, which are not to be displayed and adjust formatters.
707
708        Attributes affected:
709            - tr_frame
710            - formatters
711            - tr_col_num
712        """
713        assert self.max_cols_fitted is not None
714        col_num = self.max_cols_fitted // 2
715        if col_num >= 1:
716            left = self.tr_frame.iloc[:, :col_num]
717            right = self.tr_frame.iloc[:, -col_num:]
718            self.tr_frame = concat((left, right), axis=1)
719
720            # truncate formatter
721            if isinstance(self.formatters, (list, tuple)):
722                self.formatters = [
723                    *self.formatters[:col_num],
724                    *self.formatters[-col_num:],
725                ]
726        else:
727            col_num = cast(int, self.max_cols)
728            self.tr_frame = self.tr_frame.iloc[:, :col_num]
729        self.tr_col_num = col_num
730
731    def _truncate_vertically(self) -> None:
732        """Remove rows, which are not to be displayed.
733
734        Attributes affected:
735            - tr_frame
736            - tr_row_num
737        """
738        assert self.max_rows_fitted is not None
739        row_num = self.max_rows_fitted // 2
740        if row_num >= 1:
741            head = self.tr_frame.iloc[:row_num, :]
742            tail = self.tr_frame.iloc[-row_num:, :]
743            self.tr_frame = concat((head, tail))
744        else:
745            row_num = cast(int, self.max_rows)
746            self.tr_frame = self.tr_frame.iloc[:row_num, :]
747        self.tr_row_num = row_num
748
749    def _get_strcols_without_index(self) -> List[List[str]]:
750        strcols: List[List[str]] = []
751
752        if not is_list_like(self.header) and not self.header:
753            for i, c in enumerate(self.tr_frame):
754                fmt_values = self.format_col(i)
755                fmt_values = _make_fixed_width(
756                    strings=fmt_values,
757                    justify=self.justify,
758                    minimum=int(self.col_space.get(c, 0)),
759                    adj=self.adj,
760                )
761                strcols.append(fmt_values)
762            return strcols
763
764        if is_list_like(self.header):
765            # cast here since can't be bool if is_list_like
766            self.header = cast(List[str], self.header)
767            if len(self.header) != len(self.columns):
768                raise ValueError(
769                    f"Writing {len(self.columns)} cols "
770                    f"but got {len(self.header)} aliases"
771                )
772            str_columns = [[label] for label in self.header]
773        else:
774            str_columns = self._get_formatted_column_labels(self.tr_frame)
775
776        if self.show_row_idx_names:
777            for x in str_columns:
778                x.append("")
779
780        for i, c in enumerate(self.tr_frame):
781            cheader = str_columns[i]
782            header_colwidth = max(
783                int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)
784            )
785            fmt_values = self.format_col(i)
786            fmt_values = _make_fixed_width(
787                fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
788            )
789
790            max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
791            cheader = self.adj.justify(cheader, max_len, mode=self.justify)
792            strcols.append(cheader + fmt_values)
793
794        return strcols
795
796    def format_col(self, i: int) -> List[str]:
797        frame = self.tr_frame
798        formatter = self._get_formatter(i)
799        return format_array(
800            frame.iloc[:, i]._values,
801            formatter,
802            float_format=self.float_format,
803            na_rep=self.na_rep,
804            space=self.col_space.get(frame.columns[i]),
805            decimal=self.decimal,
806            leading_space=self.index,
807        )
808
809    def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]:
810        if isinstance(self.formatters, (list, tuple)):
811            if is_integer(i):
812                i = cast(int, i)
813                return self.formatters[i]
814            else:
815                return None
816        else:
817            if is_integer(i) and i not in self.columns:
818                i = self.columns[i]
819            return self.formatters.get(i, None)
820
821    def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]:
822        from pandas.core.indexes.multi import sparsify_labels
823
824        columns = frame.columns
825
826        if isinstance(columns, MultiIndex):
827            fmt_columns = columns.format(sparsify=False, adjoin=False)
828            fmt_columns = list(zip(*fmt_columns))
829            dtypes = self.frame.dtypes._values
830
831            # if we have a Float level, they don't use leading space at all
832            restrict_formatting = any(level.is_floating for level in columns.levels)
833            need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
834
835            def space_format(x, y):
836                if (
837                    y not in self.formatters
838                    and need_leadsp[x]
839                    and not restrict_formatting
840                ):
841                    return " " + y
842                return y
843
844            str_columns = list(
845                zip(*[[space_format(x, y) for y in x] for x in fmt_columns])
846            )
847            if self.sparsify and len(str_columns):
848                str_columns = sparsify_labels(str_columns)
849
850            str_columns = [list(x) for x in zip(*str_columns)]
851        else:
852            fmt_columns = columns.format()
853            dtypes = self.frame.dtypes
854            need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
855            str_columns = [
856                [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]
857                for i, (col, x) in enumerate(zip(columns, fmt_columns))
858            ]
859        # self.str_columns = str_columns
860        return str_columns
861
862    def _get_formatted_index(self, frame: "DataFrame") -> List[str]:
863        # Note: this is only used by to_string() and to_latex(), not by
864        # to_html(). so safe to cast col_space here.
865        col_space = {k: cast(int, v) for k, v in self.col_space.items()}
866        index = frame.index
867        columns = frame.columns
868        fmt = self._get_formatter("__index__")
869
870        if isinstance(index, MultiIndex):
871            fmt_index = index.format(
872                sparsify=self.sparsify,
873                adjoin=False,
874                names=self.show_row_idx_names,
875                formatter=fmt,
876            )
877        else:
878            fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
879
880        fmt_index = [
881            tuple(
882                _make_fixed_width(
883                    list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj
884                )
885            )
886            for x in fmt_index
887        ]
888
889        adjoined = self.adj.adjoin(1, *fmt_index).split("\n")
890
891        # empty space for columns
892        if self.show_col_idx_names:
893            col_header = [str(x) for x in self._get_column_name_list()]
894        else:
895            col_header = [""] * columns.nlevels
896
897        if self.header:
898            return col_header + adjoined
899        else:
900            return adjoined
901
902    def _get_column_name_list(self) -> List[str]:
903        names: List[str] = []
904        columns = self.frame.columns
905        if isinstance(columns, MultiIndex):
906            names.extend("" if name is None else name for name in columns.names)
907        else:
908            names.append("" if columns.name is None else columns.name)
909        return names
910
911
912class DataFrameRenderer:
913    """Class for creating dataframe output in multiple formats.
914
915    Called in pandas.core.generic.NDFrame:
916        - to_csv
917        - to_latex
918
919    Called in pandas.core.frame.DataFrame:
920        - to_html
921        - to_string
922
923    Parameters
924    ----------
925    fmt : DataFrameFormatter
926        Formatter with the formating options.
927    """
928
929    def __init__(self, fmt: DataFrameFormatter):
930        self.fmt = fmt
931
932    def to_latex(
933        self,
934        buf: Optional[FilePathOrBuffer[str]] = None,
935        column_format: Optional[str] = None,
936        longtable: bool = False,
937        encoding: Optional[str] = None,
938        multicolumn: bool = False,
939        multicolumn_format: Optional[str] = None,
940        multirow: bool = False,
941        caption: Optional[str] = None,
942        label: Optional[str] = None,
943        position: Optional[str] = None,
944    ) -> Optional[str]:
945        """
946        Render a DataFrame to a LaTeX tabular/longtable environment output.
947        """
948        from pandas.io.formats.latex import LatexFormatter
949
950        latex_formatter = LatexFormatter(
951            self.fmt,
952            longtable=longtable,
953            column_format=column_format,
954            multicolumn=multicolumn,
955            multicolumn_format=multicolumn_format,
956            multirow=multirow,
957            caption=caption,
958            label=label,
959            position=position,
960        )
961        string = latex_formatter.to_string()
962        return save_to_buffer(string, buf=buf, encoding=encoding)
963
964    def to_html(
965        self,
966        buf: Optional[FilePathOrBuffer[str]] = None,
967        encoding: Optional[str] = None,
968        classes: Optional[Union[str, List, Tuple]] = None,
969        notebook: bool = False,
970        border: Optional[int] = None,
971        table_id: Optional[str] = None,
972        render_links: bool = False,
973    ) -> Optional[str]:
974        """
975        Render a DataFrame to a html table.
976
977        Parameters
978        ----------
979        buf : str, Path or StringIO-like, optional, default None
980            Buffer to write to. If None, the output is returned as a string.
981        encoding : str, default “utf-8”
982            Set character encoding.
983        classes : str or list-like
984            classes to include in the `class` attribute of the opening
985            ``<table>`` tag, in addition to the default "dataframe".
986        notebook : {True, False}, optional, default False
987            Whether the generated HTML is for IPython Notebook.
988        border : int
989            A ``border=border`` attribute is included in the opening
990            ``<table>`` tag. Default ``pd.options.display.html.border``.
991        table_id : str, optional
992            A css id is included in the opening `<table>` tag if specified.
993        render_links : bool, default False
994            Convert URLs to HTML links.
995        """
996        from pandas.io.formats.html import HTMLFormatter, NotebookFormatter
997
998        Klass = NotebookFormatter if notebook else HTMLFormatter
999
1000        html_formatter = Klass(
1001            self.fmt,
1002            classes=classes,
1003            border=border,
1004            table_id=table_id,
1005            render_links=render_links,
1006        )
1007        string = html_formatter.to_string()
1008        return save_to_buffer(string, buf=buf, encoding=encoding)
1009
1010    def to_string(
1011        self,
1012        buf: Optional[FilePathOrBuffer[str]] = None,
1013        encoding: Optional[str] = None,
1014        line_width: Optional[int] = None,
1015    ) -> Optional[str]:
1016        """
1017        Render a DataFrame to a console-friendly tabular output.
1018
1019        Parameters
1020        ----------
1021        buf : str, Path or StringIO-like, optional, default None
1022            Buffer to write to. If None, the output is returned as a string.
1023        encoding: str, default “utf-8”
1024            Set character encoding.
1025        line_width : int, optional
1026            Width to wrap a line in characters.
1027        """
1028        from pandas.io.formats.string import StringFormatter
1029
1030        string_formatter = StringFormatter(self.fmt, line_width=line_width)
1031        string = string_formatter.to_string()
1032        return save_to_buffer(string, buf=buf, encoding=encoding)
1033
1034    def to_csv(
1035        self,
1036        path_or_buf: Optional[FilePathOrBuffer[str]] = None,
1037        encoding: Optional[str] = None,
1038        sep: str = ",",
1039        columns: Optional[Sequence[Label]] = None,
1040        index_label: Optional[IndexLabel] = None,
1041        mode: str = "w",
1042        compression: CompressionOptions = "infer",
1043        quoting: Optional[int] = None,
1044        quotechar: str = '"',
1045        line_terminator: Optional[str] = None,
1046        chunksize: Optional[int] = None,
1047        date_format: Optional[str] = None,
1048        doublequote: bool = True,
1049        escapechar: Optional[str] = None,
1050        errors: str = "strict",
1051        storage_options: StorageOptions = None,
1052    ) -> Optional[str]:
1053        """
1054        Render dataframe as comma-separated file.
1055        """
1056        from pandas.io.formats.csvs import CSVFormatter
1057
1058        if path_or_buf is None:
1059            created_buffer = True
1060            path_or_buf = StringIO()
1061        else:
1062            created_buffer = False
1063
1064        csv_formatter = CSVFormatter(
1065            path_or_buf=path_or_buf,
1066            line_terminator=line_terminator,
1067            sep=sep,
1068            encoding=encoding,
1069            errors=errors,
1070            compression=compression,
1071            quoting=quoting,
1072            cols=columns,
1073            index_label=index_label,
1074            mode=mode,
1075            chunksize=chunksize,
1076            quotechar=quotechar,
1077            date_format=date_format,
1078            doublequote=doublequote,
1079            escapechar=escapechar,
1080            storage_options=storage_options,
1081            formatter=self.fmt,
1082        )
1083        csv_formatter.save()
1084
1085        if created_buffer:
1086            assert isinstance(path_or_buf, StringIO)
1087            content = path_or_buf.getvalue()
1088            path_or_buf.close()
1089            return content
1090
1091        return None
1092
1093
1094def save_to_buffer(
1095    string: str,
1096    buf: Optional[FilePathOrBuffer[str]] = None,
1097    encoding: Optional[str] = None,
1098) -> Optional[str]:
1099    """
1100    Perform serialization. Write to buf or return as string if buf is None.
1101    """
1102    with get_buffer(buf, encoding=encoding) as f:
1103        f.write(string)
1104        if buf is None:
1105            return f.getvalue()
1106        return None
1107
1108
1109@contextmanager
1110def get_buffer(buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None):
1111    """
1112    Context manager to open, yield and close buffer for filenames or Path-like
1113    objects, otherwise yield buf unchanged.
1114    """
1115    if buf is not None:
1116        buf = stringify_path(buf)
1117    else:
1118        buf = StringIO()
1119
1120    if encoding is None:
1121        encoding = "utf-8"
1122    elif not isinstance(buf, str):
1123        raise ValueError("buf is not a file name and encoding is specified.")
1124
1125    if hasattr(buf, "write"):
1126        yield buf
1127    elif isinstance(buf, str):
1128        with open(buf, "w", encoding=encoding, newline="") as f:
1129            # GH#30034 open instead of codecs.open prevents a file leak
1130            #  if we have an invalid encoding argument.
1131            # newline="" is needed to roundtrip correctly on
1132            #  windows test_to_latex_filename
1133            yield f
1134    else:
1135        raise TypeError("buf is not a file name and it has no write method")
1136
1137
1138# ----------------------------------------------------------------------
1139# Array formatters
1140
1141
1142def format_array(
1143    values: Any,
1144    formatter: Optional[Callable],
1145    float_format: Optional[FloatFormatType] = None,
1146    na_rep: str = "NaN",
1147    digits: Optional[int] = None,
1148    space: Optional[Union[str, int]] = None,
1149    justify: str = "right",
1150    decimal: str = ".",
1151    leading_space: Optional[bool] = True,
1152    quoting: Optional[int] = None,
1153) -> List[str]:
1154    """
1155    Format an array for printing.
1156
1157    Parameters
1158    ----------
1159    values
1160    formatter
1161    float_format
1162    na_rep
1163    digits
1164    space
1165    justify
1166    decimal
1167    leading_space : bool, optional, default True
1168        Whether the array should be formatted with a leading space.
1169        When an array as a column of a Series or DataFrame, we do want
1170        the leading space to pad between columns.
1171
1172        When formatting an Index subclass
1173        (e.g. IntervalIndex._format_native_types), we don't want the
1174        leading space since it should be left-aligned.
1175
1176    Returns
1177    -------
1178    List[str]
1179    """
1180    fmt_klass: Type[GenericArrayFormatter]
1181    if is_datetime64_dtype(values.dtype):
1182        fmt_klass = Datetime64Formatter
1183    elif is_datetime64tz_dtype(values.dtype):
1184        fmt_klass = Datetime64TZFormatter
1185    elif is_timedelta64_dtype(values.dtype):
1186        fmt_klass = Timedelta64Formatter
1187    elif is_extension_array_dtype(values.dtype):
1188        fmt_klass = ExtensionArrayFormatter
1189    elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
1190        fmt_klass = FloatArrayFormatter
1191    elif is_integer_dtype(values.dtype):
1192        fmt_klass = IntArrayFormatter
1193    else:
1194        fmt_klass = GenericArrayFormatter
1195
1196    if space is None:
1197        space = get_option("display.column_space")
1198
1199    if float_format is None:
1200        float_format = get_option("display.float_format")
1201
1202    if digits is None:
1203        digits = get_option("display.precision")
1204
1205    fmt_obj = fmt_klass(
1206        values,
1207        digits=digits,
1208        na_rep=na_rep,
1209        float_format=float_format,
1210        formatter=formatter,
1211        space=space,
1212        justify=justify,
1213        decimal=decimal,
1214        leading_space=leading_space,
1215        quoting=quoting,
1216    )
1217
1218    return fmt_obj.get_result()
1219
1220
1221class GenericArrayFormatter:
1222    def __init__(
1223        self,
1224        values: Any,
1225        digits: int = 7,
1226        formatter: Optional[Callable] = None,
1227        na_rep: str = "NaN",
1228        space: Union[str, int] = 12,
1229        float_format: Optional[FloatFormatType] = None,
1230        justify: str = "right",
1231        decimal: str = ".",
1232        quoting: Optional[int] = None,
1233        fixed_width: bool = True,
1234        leading_space: Optional[bool] = True,
1235    ):
1236        self.values = values
1237        self.digits = digits
1238        self.na_rep = na_rep
1239        self.space = space
1240        self.formatter = formatter
1241        self.float_format = float_format
1242        self.justify = justify
1243        self.decimal = decimal
1244        self.quoting = quoting
1245        self.fixed_width = fixed_width
1246        self.leading_space = leading_space
1247
1248    def get_result(self) -> List[str]:
1249        fmt_values = self._format_strings()
1250        return _make_fixed_width(fmt_values, self.justify)
1251
1252    def _format_strings(self) -> List[str]:
1253        if self.float_format is None:
1254            float_format = get_option("display.float_format")
1255            if float_format is None:
1256                precision = get_option("display.precision")
1257                float_format = lambda x: _trim_zeros_single_float(
1258                    f"{x: .{precision:d}f}"
1259                )
1260        else:
1261            float_format = self.float_format
1262
1263        if self.formatter is not None:
1264            formatter = self.formatter
1265        else:
1266            quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE
1267            formatter = partial(
1268                pprint_thing,
1269                escape_chars=("\t", "\r", "\n"),
1270                quote_strings=quote_strings,
1271            )
1272
1273        def _format(x):
1274            if self.na_rep is not None and is_scalar(x) and isna(x):
1275                try:
1276                    # try block for np.isnat specifically
1277                    # determine na_rep if x is None or NaT-like
1278                    if x is None:
1279                        return "None"
1280                    elif x is NA:
1281                        return str(NA)
1282                    elif x is NaT or np.isnat(x):
1283                        return "NaT"
1284                except (TypeError, ValueError):
1285                    # np.isnat only handles datetime or timedelta objects
1286                    pass
1287                return self.na_rep
1288            elif isinstance(x, PandasObject):
1289                return str(x)
1290            else:
1291                # object dtype
1292                return str(formatter(x))
1293
1294        vals = extract_array(self.values, extract_numpy=True)
1295
1296        is_float_type = (
1297            lib.map_infer(vals, is_float)
1298            # vals may have 2 or more dimensions
1299            & np.all(notna(vals), axis=tuple(range(1, len(vals.shape))))
1300        )
1301        leading_space = self.leading_space
1302        if leading_space is None:
1303            leading_space = is_float_type.any()
1304
1305        fmt_values = []
1306        for i, v in enumerate(vals):
1307            if not is_float_type[i] and leading_space:
1308                fmt_values.append(f" {_format(v)}")
1309            elif is_float_type[i]:
1310                fmt_values.append(float_format(v))
1311            else:
1312                if leading_space is False:
1313                    # False specifically, so that the default is
1314                    # to include a space if we get here.
1315                    tpl = "{v}"
1316                else:
1317                    tpl = " {v}"
1318                fmt_values.append(tpl.format(v=_format(v)))
1319
1320        return fmt_values
1321
1322
1323class FloatArrayFormatter(GenericArrayFormatter):
1324    def __init__(self, *args, **kwargs):
1325        super().__init__(*args, **kwargs)
1326
1327        # float_format is expected to be a string
1328        # formatter should be used to pass a function
1329        if self.float_format is not None and self.formatter is None:
1330            # GH21625, GH22270
1331            self.fixed_width = False
1332            if callable(self.float_format):
1333                self.formatter = self.float_format
1334                self.float_format = None
1335
1336    def _value_formatter(
1337        self,
1338        float_format: Optional[FloatFormatType] = None,
1339        threshold: Optional[Union[float, int]] = None,
1340    ) -> Callable:
1341        """Returns a function to be applied on each value to format it"""
1342        # the float_format parameter supersedes self.float_format
1343        if float_format is None:
1344            float_format = self.float_format
1345
1346        # we are going to compose different functions, to first convert to
1347        # a string, then replace the decimal symbol, and finally chop according
1348        # to the threshold
1349
1350        # when there is no float_format, we use str instead of '%g'
1351        # because str(0.0) = '0.0' while '%g' % 0.0 = '0'
1352        if float_format:
1353
1354            def base_formatter(v):
1355                assert float_format is not None  # for mypy
1356                # pandas\io\formats\format.py:1411: error: "str" not callable
1357                # [operator]
1358
1359                # pandas\io\formats\format.py:1411: error: Unexpected keyword
1360                # argument "value" for "__call__" of "EngFormatter"  [call-arg]
1361                return (
1362                    float_format(value=v)  # type: ignore[operator,call-arg]
1363                    if notna(v)
1364                    else self.na_rep
1365                )
1366
1367        else:
1368
1369            def base_formatter(v):
1370                return str(v) if notna(v) else self.na_rep
1371
1372        if self.decimal != ".":
1373
1374            def decimal_formatter(v):
1375                return base_formatter(v).replace(".", self.decimal, 1)
1376
1377        else:
1378            decimal_formatter = base_formatter
1379
1380        if threshold is None:
1381            return decimal_formatter
1382
1383        def formatter(value):
1384            if notna(value):
1385                if abs(value) > threshold:
1386                    return decimal_formatter(value)
1387                else:
1388                    return decimal_formatter(0.0)
1389            else:
1390                return self.na_rep
1391
1392        return formatter
1393
1394    def get_result_as_array(self) -> np.ndarray:
1395        """
1396        Returns the float values converted into strings using
1397        the parameters given at initialisation, as a numpy array
1398        """
1399
1400        def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
1401            mask = isna(values)
1402            formatted = np.array(
1403                [
1404                    formatter(val) if not m else na_rep
1405                    for val, m in zip(values.ravel(), mask.ravel())
1406                ]
1407            ).reshape(values.shape)
1408            return formatted
1409
1410        if self.formatter is not None:
1411            return format_with_na_rep(self.values, self.formatter, self.na_rep)
1412
1413        if self.fixed_width:
1414            threshold = get_option("display.chop_threshold")
1415        else:
1416            threshold = None
1417
1418        # if we have a fixed_width, we'll need to try different float_format
1419        def format_values_with(float_format):
1420            formatter = self._value_formatter(float_format, threshold)
1421
1422            # default formatter leaves a space to the left when formatting
1423            # floats, must be consistent for left-justifying NaNs (GH #25061)
1424            if self.justify == "left":
1425                na_rep = " " + self.na_rep
1426            else:
1427                na_rep = self.na_rep
1428
1429            # separate the wheat from the chaff
1430            values = self.values
1431            is_complex = is_complex_dtype(values)
1432            values = format_with_na_rep(values, formatter, na_rep)
1433
1434            if self.fixed_width:
1435                if is_complex:
1436                    result = _trim_zeros_complex(values, self.decimal)
1437                else:
1438                    result = _trim_zeros_float(values, self.decimal)
1439                return np.asarray(result, dtype="object")
1440
1441            return values
1442
1443        # There is a special default string when we are fixed-width
1444        # The default is otherwise to use str instead of a formatting string
1445        float_format: Optional[FloatFormatType]
1446        if self.float_format is None:
1447            if self.fixed_width:
1448                if self.leading_space is True:
1449                    fmt_str = "{value: .{digits:d}f}"
1450                else:
1451                    fmt_str = "{value:.{digits:d}f}"
1452                float_format = partial(fmt_str.format, digits=self.digits)
1453            else:
1454                float_format = self.float_format
1455        else:
1456            float_format = lambda value: self.float_format % value
1457
1458        formatted_values = format_values_with(float_format)
1459
1460        if not self.fixed_width:
1461            return formatted_values
1462
1463        # we need do convert to engineering format if some values are too small
1464        # and would appear as 0, or if some values are too big and take too
1465        # much space
1466
1467        if len(formatted_values) > 0:
1468            maxlen = max(len(x) for x in formatted_values)
1469            too_long = maxlen > self.digits + 6
1470        else:
1471            too_long = False
1472
1473        with np.errstate(invalid="ignore"):
1474            abs_vals = np.abs(self.values)
1475            # this is pretty arbitrary for now
1476            # large values: more that 8 characters including decimal symbol
1477            # and first digit, hence > 1e6
1478            has_large_values = (abs_vals > 1e6).any()
1479            has_small_values = (
1480                (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)
1481            ).any()
1482
1483        if has_small_values or (too_long and has_large_values):
1484            if self.leading_space is True:
1485                fmt_str = "{value: .{digits:d}e}"
1486            else:
1487                fmt_str = "{value:.{digits:d}e}"
1488            float_format = partial(fmt_str.format, digits=self.digits)
1489            formatted_values = format_values_with(float_format)
1490
1491        return formatted_values
1492
1493    def _format_strings(self) -> List[str]:
1494        return list(self.get_result_as_array())
1495
1496
1497class IntArrayFormatter(GenericArrayFormatter):
1498    def _format_strings(self) -> List[str]:
1499        if self.leading_space is False:
1500            formatter_str = lambda x: f"{x:d}".format(x=x)
1501        else:
1502            formatter_str = lambda x: f"{x: d}".format(x=x)
1503        formatter = self.formatter or formatter_str
1504        fmt_values = [formatter(x) for x in self.values]
1505        return fmt_values
1506
1507
1508class Datetime64Formatter(GenericArrayFormatter):
1509    def __init__(
1510        self,
1511        values: Union[np.ndarray, "Series", DatetimeIndex, DatetimeArray],
1512        nat_rep: str = "NaT",
1513        date_format: None = None,
1514        **kwargs,
1515    ):
1516        super().__init__(values, **kwargs)
1517        self.nat_rep = nat_rep
1518        self.date_format = date_format
1519
1520    def _format_strings(self) -> List[str]:
1521        """ we by definition have DO NOT have a TZ """
1522        values = self.values
1523
1524        if not isinstance(values, DatetimeIndex):
1525            values = DatetimeIndex(values)
1526
1527        if self.formatter is not None and callable(self.formatter):
1528            return [self.formatter(x) for x in values]
1529
1530        fmt_values = values._data._format_native_types(
1531            na_rep=self.nat_rep, date_format=self.date_format
1532        )
1533        return fmt_values.tolist()
1534
1535
1536class ExtensionArrayFormatter(GenericArrayFormatter):
1537    def _format_strings(self) -> List[str]:
1538        values = extract_array(self.values, extract_numpy=True)
1539
1540        formatter = self.formatter
1541        if formatter is None:
1542            formatter = values._formatter(boxed=True)
1543
1544        if is_categorical_dtype(values.dtype):
1545            # Categorical is special for now, so that we can preserve tzinfo
1546            array = values._internal_get_values()
1547        else:
1548            array = np.asarray(values)
1549
1550        fmt_values = format_array(
1551            array,
1552            formatter,
1553            float_format=self.float_format,
1554            na_rep=self.na_rep,
1555            digits=self.digits,
1556            space=self.space,
1557            justify=self.justify,
1558            decimal=self.decimal,
1559            leading_space=self.leading_space,
1560            quoting=self.quoting,
1561        )
1562        return fmt_values
1563
1564
1565def format_percentiles(
1566    percentiles: Union[
1567        np.ndarray, List[Union[int, float]], List[float], List[Union[str, float]]
1568    ]
1569) -> List[str]:
1570    """
1571    Outputs rounded and formatted percentiles.
1572
1573    Parameters
1574    ----------
1575    percentiles : list-like, containing floats from interval [0,1]
1576
1577    Returns
1578    -------
1579    formatted : list of strings
1580
1581    Notes
1582    -----
1583    Rounding precision is chosen so that: (1) if any two elements of
1584    ``percentiles`` differ, they remain different after rounding
1585    (2) no entry is *rounded* to 0% or 100%.
1586    Any non-integer is always rounded to at least 1 decimal place.
1587
1588    Examples
1589    --------
1590    Keeps all entries different after rounding:
1591
1592    >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
1593    ['1.999%', '2.001%', '50%', '66.667%', '99.99%']
1594
1595    No element is rounded to 0% or 100% (unless already equal to it).
1596    Duplicates are allowed:
1597
1598    >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
1599    ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
1600    """
1601    percentiles = np.asarray(percentiles)
1602
1603    # It checks for np.NaN as well
1604    with np.errstate(invalid="ignore"):
1605        if (
1606            not is_numeric_dtype(percentiles)
1607            or not np.all(percentiles >= 0)
1608            or not np.all(percentiles <= 1)
1609        ):
1610            raise ValueError("percentiles should all be in the interval [0,1]")
1611
1612    percentiles = 100 * percentiles
1613    int_idx = np.isclose(percentiles.astype(int), percentiles)
1614
1615    if np.all(int_idx):
1616        out = percentiles.astype(int).astype(str)
1617        return [i + "%" for i in out]
1618
1619    unique_pcts = np.unique(percentiles)
1620    to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
1621    to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
1622
1623    # Least precision that keeps percentiles unique after rounding
1624    prec = -np.floor(
1625        np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))
1626    ).astype(int)
1627    prec = max(1, prec)
1628    out = np.empty_like(percentiles, dtype=object)
1629    out[int_idx] = percentiles[int_idx].astype(int).astype(str)
1630    out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
1631    return [i + "%" for i in out]
1632
1633
1634def is_dates_only(
1635    values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex]
1636) -> bool:
1637    # return a boolean if we are only dates (and don't have a timezone)
1638    if not isinstance(values, Index):
1639        values = values.ravel()
1640
1641    values = DatetimeIndex(values)
1642    if values.tz is not None:
1643        return False
1644
1645    values_int = values.asi8
1646    consider_values = values_int != iNaT
1647    one_day_nanos = 86400 * 1e9
1648    even_days = (
1649        np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0
1650    )
1651    if even_days:
1652        return True
1653    return False
1654
1655
1656def _format_datetime64(x: Union[NaTType, Timestamp], nat_rep: str = "NaT") -> str:
1657    if x is NaT:
1658        return nat_rep
1659
1660    return str(x)
1661
1662
1663def _format_datetime64_dateonly(
1664    x: Union[NaTType, Timestamp],
1665    nat_rep: str = "NaT",
1666    date_format: Optional[str] = None,
1667) -> str:
1668    if x is NaT:
1669        return nat_rep
1670
1671    if date_format:
1672        return x.strftime(date_format)
1673    else:
1674        return x._date_repr
1675
1676
1677def get_format_datetime64(
1678    is_dates_only: bool, nat_rep: str = "NaT", date_format: Optional[str] = None
1679) -> Callable:
1680
1681    if is_dates_only:
1682        return lambda x: _format_datetime64_dateonly(
1683            x, nat_rep=nat_rep, date_format=date_format
1684        )
1685    else:
1686        return lambda x: _format_datetime64(x, nat_rep=nat_rep)
1687
1688
1689def get_format_datetime64_from_values(
1690    values: Union[np.ndarray, DatetimeArray, DatetimeIndex], date_format: Optional[str]
1691) -> Optional[str]:
1692    """ given values and a date_format, return a string format """
1693    if isinstance(values, np.ndarray) and values.ndim > 1:
1694        # We don't actually care about the order of values, and DatetimeIndex
1695        #  only accepts 1D values
1696        values = values.ravel()
1697
1698    ido = is_dates_only(values)
1699    if ido:
1700        return date_format or "%Y-%m-%d"
1701    return date_format
1702
1703
1704class Datetime64TZFormatter(Datetime64Formatter):
1705    def _format_strings(self) -> List[str]:
1706        """ we by definition have a TZ """
1707        values = self.values.astype(object)
1708        ido = is_dates_only(values)
1709        formatter = self.formatter or get_format_datetime64(
1710            ido, date_format=self.date_format
1711        )
1712        fmt_values = [formatter(x) for x in values]
1713
1714        return fmt_values
1715
1716
1717class Timedelta64Formatter(GenericArrayFormatter):
1718    def __init__(
1719        self,
1720        values: Union[np.ndarray, TimedeltaIndex],
1721        nat_rep: str = "NaT",
1722        box: bool = False,
1723        **kwargs,
1724    ):
1725        super().__init__(values, **kwargs)
1726        self.nat_rep = nat_rep
1727        self.box = box
1728
1729    def _format_strings(self) -> List[str]:
1730        formatter = self.formatter or get_format_timedelta64(
1731            self.values, nat_rep=self.nat_rep, box=self.box
1732        )
1733        return [formatter(x) for x in self.values]
1734
1735
1736def get_format_timedelta64(
1737    values: Union[np.ndarray, TimedeltaIndex, TimedeltaArray],
1738    nat_rep: str = "NaT",
1739    box: bool = False,
1740) -> Callable:
1741    """
1742    Return a formatter function for a range of timedeltas.
1743    These will all have the same format argument
1744
1745    If box, then show the return in quotes
1746    """
1747    values_int = values.astype(np.int64)
1748
1749    consider_values = values_int != iNaT
1750
1751    one_day_nanos = 86400 * 1e9
1752    even_days = (
1753        np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0
1754    )
1755
1756    if even_days:
1757        format = None
1758    else:
1759        format = "long"
1760
1761    def _formatter(x):
1762        if x is None or (is_scalar(x) and isna(x)):
1763            return nat_rep
1764
1765        if not isinstance(x, Timedelta):
1766            x = Timedelta(x)
1767        result = x._repr_base(format=format)
1768        if box:
1769            result = f"'{result}'"
1770        return result
1771
1772    return _formatter
1773
1774
1775def _make_fixed_width(
1776    strings: List[str],
1777    justify: str = "right",
1778    minimum: Optional[int] = None,
1779    adj: Optional[TextAdjustment] = None,
1780) -> List[str]:
1781
1782    if len(strings) == 0 or justify == "all":
1783        return strings
1784
1785    if adj is None:
1786        adjustment = get_adjustment()
1787    else:
1788        adjustment = adj
1789
1790    max_len = max(adjustment.len(x) for x in strings)
1791
1792    if minimum is not None:
1793        max_len = max(minimum, max_len)
1794
1795    conf_max = get_option("display.max_colwidth")
1796    if conf_max is not None and max_len > conf_max:
1797        max_len = conf_max
1798
1799    def just(x: str) -> str:
1800        if conf_max is not None:
1801            if (conf_max > 3) & (adjustment.len(x) > max_len):
1802                x = x[: max_len - 3] + "..."
1803        return x
1804
1805    strings = [just(x) for x in strings]
1806    result = adjustment.justify(strings, max_len, mode=justify)
1807    return result
1808
1809
1810def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> List[str]:
1811    """
1812    Separates the real and imaginary parts from the complex number, and
1813    executes the _trim_zeros_float method on each of those.
1814    """
1815    trimmed = [
1816        "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))
1817        for x in str_complexes
1818    ]
1819
1820    # pad strings to the length of the longest trimmed string for alignment
1821    lengths = [len(s) for s in trimmed]
1822    max_length = max(lengths)
1823    padded = [
1824        s[: -((k - 1) // 2 + 1)]  # real part
1825        + (max_length - k) // 2 * "0"
1826        + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)]  # + / -
1827        + s[-((k - 1) // 2) : -1]  # imaginary part
1828        + (max_length - k) // 2 * "0"
1829        + s[-1]
1830        for s, k in zip(trimmed, lengths)
1831    ]
1832    return padded
1833
1834
1835def _trim_zeros_single_float(str_float: str) -> str:
1836    """
1837    Trims trailing zeros after a decimal point,
1838    leaving just one if necessary.
1839    """
1840    str_float = str_float.rstrip("0")
1841    if str_float.endswith("."):
1842        str_float += "0"
1843
1844    return str_float
1845
1846
1847def _trim_zeros_float(
1848    str_floats: Union[np.ndarray, List[str]], decimal: str = "."
1849) -> List[str]:
1850    """
1851    Trims the maximum number of trailing zeros equally from
1852    all numbers containing decimals, leaving just one if
1853    necessary.
1854    """
1855    trimmed = str_floats
1856    number_regex = re.compile(fr"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$")
1857
1858    def is_number_with_decimal(x):
1859        return re.match(number_regex, x) is not None
1860
1861    def should_trim(values: Union[np.ndarray, List[str]]) -> bool:
1862        """
1863        Determine if an array of strings should be trimmed.
1864
1865        Returns True if all numbers containing decimals (defined by the
1866        above regular expression) within the array end in a zero, otherwise
1867        returns False.
1868        """
1869        numbers = [x for x in values if is_number_with_decimal(x)]
1870        return len(numbers) > 0 and all(x.endswith("0") for x in numbers)
1871
1872    while should_trim(trimmed):
1873        trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed]
1874
1875    # leave one 0 after the decimal points if need be.
1876    result = [
1877        x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x
1878        for x in trimmed
1879    ]
1880    return result
1881
1882
1883def _has_names(index: Index) -> bool:
1884    if isinstance(index, MultiIndex):
1885        return com.any_not_none(*index.names)
1886    else:
1887        return index.name is not None
1888
1889
1890class EngFormatter:
1891    """
1892    Formats float values according to engineering format.
1893
1894    Based on matplotlib.ticker.EngFormatter
1895    """
1896
1897    # The SI engineering prefixes
1898    ENG_PREFIXES = {
1899        -24: "y",
1900        -21: "z",
1901        -18: "a",
1902        -15: "f",
1903        -12: "p",
1904        -9: "n",
1905        -6: "u",
1906        -3: "m",
1907        0: "",
1908        3: "k",
1909        6: "M",
1910        9: "G",
1911        12: "T",
1912        15: "P",
1913        18: "E",
1914        21: "Z",
1915        24: "Y",
1916    }
1917
1918    def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False):
1919        self.accuracy = accuracy
1920        self.use_eng_prefix = use_eng_prefix
1921
1922    def __call__(self, num: Union[int, float]) -> str:
1923        """
1924        Formats a number in engineering notation, appending a letter
1925        representing the power of 1000 of the original number. Some examples:
1926
1927        >>> format_eng(0)       # for self.accuracy = 0
1928        ' 0'
1929
1930        >>> format_eng(1000000) # for self.accuracy = 1,
1931                                #     self.use_eng_prefix = True
1932        ' 1.0M'
1933
1934        >>> format_eng("-1e-6") # for self.accuracy = 2
1935                                #     self.use_eng_prefix = False
1936        '-1.00E-06'
1937
1938        @param num: the value to represent
1939        @type num: either a numeric value or a string that can be converted to
1940                   a numeric value (as per decimal.Decimal constructor)
1941
1942        @return: engineering formatted string
1943        """
1944        dnum = decimal.Decimal(str(num))
1945
1946        if decimal.Decimal.is_nan(dnum):
1947            return "NaN"
1948
1949        if decimal.Decimal.is_infinite(dnum):
1950            return "inf"
1951
1952        sign = 1
1953
1954        if dnum < 0:  # pragma: no cover
1955            sign = -1
1956            dnum = -dnum
1957
1958        if dnum != 0:
1959            pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3))
1960        else:
1961            pow10 = decimal.Decimal(0)
1962
1963        pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
1964        pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
1965        int_pow10 = int(pow10)
1966
1967        if self.use_eng_prefix:
1968            prefix = self.ENG_PREFIXES[int_pow10]
1969        else:
1970            if int_pow10 < 0:
1971                prefix = f"E-{-int_pow10:02d}"
1972            else:
1973                prefix = f"E+{int_pow10:02d}"
1974
1975        mant = sign * dnum / (10 ** pow10)
1976
1977        if self.accuracy is None:  # pragma: no cover
1978            format_str = "{mant: g}{prefix}"
1979        else:
1980            format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}"
1981
1982        formatted = format_str.format(mant=mant, prefix=prefix)
1983
1984        return formatted
1985
1986
1987def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:
1988    """
1989    Alter default behavior on how float is formatted in DataFrame.
1990    Format float in engineering format. By accuracy, we mean the number of
1991    decimal digits after the floating point.
1992
1993    See also EngFormatter.
1994    """
1995    set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
1996    set_option("display.column_space", max(12, accuracy + 9))
1997
1998
1999def get_level_lengths(
2000    levels: Any, sentinel: Union[bool, object, str] = ""
2001) -> List[Dict[int, int]]:
2002    """
2003    For each index in each level the function returns lengths of indexes.
2004
2005    Parameters
2006    ----------
2007    levels : list of lists
2008        List of values on for level.
2009    sentinel : string, optional
2010        Value which states that no new index starts on there.
2011
2012    Returns
2013    -------
2014    Returns list of maps. For each level returns map of indexes (key is index
2015    in row and value is length of index).
2016    """
2017    if len(levels) == 0:
2018        return []
2019
2020    control = [True] * len(levels[0])
2021
2022    result = []
2023    for level in levels:
2024        last_index = 0
2025
2026        lengths = {}
2027        for i, key in enumerate(level):
2028            if control[i] and key == sentinel:
2029                pass
2030            else:
2031                control[i] = False
2032                lengths[last_index] = i - last_index
2033                last_index = i
2034
2035        lengths[last_index] = len(level) - last_index
2036
2037        result.append(lengths)
2038
2039    return result
2040
2041
2042def buffer_put_lines(buf: IO[str], lines: List[str]) -> None:
2043    """
2044    Appends lines to a buffer.
2045
2046    Parameters
2047    ----------
2048    buf
2049        The buffer to write to
2050    lines
2051        The lines to append.
2052    """
2053    if any(isinstance(x, str) for x in lines):
2054        lines = [str(x) for x in lines]
2055    buf.write("\n".join(lines))
2056