1"""String formatting routines for __repr__.
2"""
3import contextlib
4import functools
5from datetime import datetime, timedelta
6from itertools import chain, zip_longest
7from typing import Hashable
8
9import numpy as np
10import pandas as pd
11from pandas.errors import OutOfBoundsDatetime
12
13from .duck_array_ops import array_equiv
14from .indexing import MemoryCachedArray
15from .options import OPTIONS, _get_boolean_with_default
16from .pycompat import dask_array_type, sparse_array_type
17from .utils import is_duck_array
18
19
20def pretty_print(x, numchars: int):
21    """Given an object `x`, call `str(x)` and format the returned string so
22    that it is numchars long, padding with trailing spaces or truncating with
23    ellipses as necessary
24    """
25    s = maybe_truncate(x, numchars)
26    return s + " " * max(numchars - len(s), 0)
27
28
29def maybe_truncate(obj, maxlen=500):
30    s = str(obj)
31    if len(s) > maxlen:
32        s = s[: (maxlen - 3)] + "..."
33    return s
34
35
36def wrap_indent(text, start="", length=None):
37    if length is None:
38        length = len(start)
39    indent = "\n" + " " * length
40    return start + indent.join(x for x in text.splitlines())
41
42
43def _get_indexer_at_least_n_items(shape, n_desired, from_end):
44    assert 0 < n_desired <= np.prod(shape)
45    cum_items = np.cumprod(shape[::-1])
46    n_steps = np.argmax(cum_items >= n_desired)
47    stop = int(np.ceil(float(n_desired) / np.r_[1, cum_items][n_steps]))
48    indexer = (
49        ((-1 if from_end else 0),) * (len(shape) - 1 - n_steps)
50        + ((slice(-stop, None) if from_end else slice(stop)),)
51        + (slice(None),) * n_steps
52    )
53    return indexer
54
55
56def first_n_items(array, n_desired):
57    """Returns the first n_desired items of an array"""
58    # Unfortunately, we can't just do array.flat[:n_desired] here because it
59    # might not be a numpy.ndarray. Moreover, access to elements of the array
60    # could be very expensive (e.g. if it's only available over DAP), so go out
61    # of our way to get them in a single call to __getitem__ using only slices.
62    if n_desired < 1:
63        raise ValueError("must request at least one item")
64
65    if array.size == 0:
66        # work around for https://github.com/numpy/numpy/issues/5195
67        return []
68
69    if n_desired < array.size:
70        indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=False)
71        array = array[indexer]
72    return np.asarray(array).flat[:n_desired]
73
74
75def last_n_items(array, n_desired):
76    """Returns the last n_desired items of an array"""
77    # Unfortunately, we can't just do array.flat[-n_desired:] here because it
78    # might not be a numpy.ndarray. Moreover, access to elements of the array
79    # could be very expensive (e.g. if it's only available over DAP), so go out
80    # of our way to get them in a single call to __getitem__ using only slices.
81    if (n_desired == 0) or (array.size == 0):
82        return []
83
84    if n_desired < array.size:
85        indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=True)
86        array = array[indexer]
87    return np.asarray(array).flat[-n_desired:]
88
89
90def last_item(array):
91    """Returns the last item of an array in a list or an empty list."""
92    if array.size == 0:
93        # work around for https://github.com/numpy/numpy/issues/5195
94        return []
95
96    indexer = (slice(-1, None),) * array.ndim
97    return np.ravel(np.asarray(array[indexer])).tolist()
98
99
100def format_timestamp(t):
101    """Cast given object to a Timestamp and return a nicely formatted string"""
102    # Timestamp is only valid for 1678 to 2262
103    try:
104        datetime_str = str(pd.Timestamp(t))
105    except OutOfBoundsDatetime:
106        datetime_str = str(t)
107
108    try:
109        date_str, time_str = datetime_str.split()
110    except ValueError:
111        # catch NaT and others that don't split nicely
112        return datetime_str
113    else:
114        if time_str == "00:00:00":
115            return date_str
116        else:
117            return f"{date_str}T{time_str}"
118
119
120def format_timedelta(t, timedelta_format=None):
121    """Cast given object to a Timestamp and return a nicely formatted string"""
122    timedelta_str = str(pd.Timedelta(t))
123    try:
124        days_str, time_str = timedelta_str.split(" days ")
125    except ValueError:
126        # catch NaT and others that don't split nicely
127        return timedelta_str
128    else:
129        if timedelta_format == "date":
130            return days_str + " days"
131        elif timedelta_format == "time":
132            return time_str
133        else:
134            return timedelta_str
135
136
137def format_item(x, timedelta_format=None, quote_strings=True):
138    """Returns a succinct summary of an object as a string"""
139    if isinstance(x, (np.datetime64, datetime)):
140        return format_timestamp(x)
141    if isinstance(x, (np.timedelta64, timedelta)):
142        return format_timedelta(x, timedelta_format=timedelta_format)
143    elif isinstance(x, (str, bytes)):
144        return repr(x) if quote_strings else x
145    elif hasattr(x, "dtype") and np.issubdtype(x.dtype, np.floating):
146        return f"{x:.4}"
147    else:
148        return str(x)
149
150
151def format_items(x):
152    """Returns a succinct summaries of all items in a sequence as strings"""
153    x = np.asarray(x)
154    timedelta_format = "datetime"
155    if np.issubdtype(x.dtype, np.timedelta64):
156        x = np.asarray(x, dtype="timedelta64[ns]")
157        day_part = x[~pd.isnull(x)].astype("timedelta64[D]").astype("timedelta64[ns]")
158        time_needed = x[~pd.isnull(x)] != day_part
159        day_needed = day_part != np.timedelta64(0, "ns")
160        if np.logical_not(day_needed).all():
161            timedelta_format = "time"
162        elif np.logical_not(time_needed).all():
163            timedelta_format = "date"
164
165    formatted = [format_item(xi, timedelta_format) for xi in x]
166    return formatted
167
168
169def format_array_flat(array, max_width: int):
170    """Return a formatted string for as many items in the flattened version of
171    array that will fit within max_width characters.
172    """
173    # every item will take up at least two characters, but we always want to
174    # print at least first and last items
175    max_possibly_relevant = min(
176        max(array.size, 1), max(int(np.ceil(max_width / 2.0)), 2)
177    )
178    relevant_front_items = format_items(
179        first_n_items(array, (max_possibly_relevant + 1) // 2)
180    )
181    relevant_back_items = format_items(last_n_items(array, max_possibly_relevant // 2))
182    # interleave relevant front and back items:
183    #     [a, b, c] and [y, z] -> [a, z, b, y, c]
184    relevant_items = sum(
185        zip_longest(relevant_front_items, reversed(relevant_back_items)), ()
186    )[:max_possibly_relevant]
187
188    cum_len = np.cumsum([len(s) + 1 for s in relevant_items]) - 1
189    if (array.size > 2) and (
190        (max_possibly_relevant < array.size) or (cum_len > max_width).any()
191    ):
192        padding = " ... "
193        max_len = max(int(np.argmax(cum_len + len(padding) - 1 > max_width)), 2)  # type: ignore[type-var]
194        count = min(array.size, max_len)
195    else:
196        count = array.size
197        padding = "" if (count <= 1) else " "
198
199    num_front = (count + 1) // 2
200    num_back = count - num_front
201    # note that num_back is 0 <--> array.size is 0 or 1
202    #                         <--> relevant_back_items is []
203    pprint_str = "".join(
204        [
205            " ".join(relevant_front_items[:num_front]),
206            padding,
207            " ".join(relevant_back_items[-num_back:]),
208        ]
209    )
210
211    # As a final check, if it's still too long even with the limit in values,
212    # replace the end with an ellipsis
213    # NB: this will still returns a full 3-character ellipsis when max_width < 3
214    if len(pprint_str) > max_width:
215        pprint_str = pprint_str[: max(max_width - 3, 0)] + "..."
216
217    return pprint_str
218
219
220_KNOWN_TYPE_REPRS = {np.ndarray: "np.ndarray"}
221with contextlib.suppress(ImportError):
222    import sparse
223
224    _KNOWN_TYPE_REPRS[sparse.COO] = "sparse.COO"
225
226
227def inline_dask_repr(array):
228    """Similar to dask.array.DataArray.__repr__, but without
229    redundant information that's already printed by the repr
230    function of the xarray wrapper.
231    """
232    assert isinstance(array, dask_array_type), array
233
234    chunksize = tuple(c[0] for c in array.chunks)
235
236    if hasattr(array, "_meta"):
237        meta = array._meta
238        if type(meta) in _KNOWN_TYPE_REPRS:
239            meta_repr = _KNOWN_TYPE_REPRS[type(meta)]
240        else:
241            meta_repr = type(meta).__name__
242        meta_string = f", meta={meta_repr}"
243    else:
244        meta_string = ""
245
246    return f"dask.array<chunksize={chunksize}{meta_string}>"
247
248
249def inline_sparse_repr(array):
250    """Similar to sparse.COO.__repr__, but without the redundant shape/dtype."""
251    assert isinstance(array, sparse_array_type), array
252    return "<{}: nnz={:d}, fill_value={!s}>".format(
253        type(array).__name__, array.nnz, array.fill_value
254    )
255
256
257def inline_variable_array_repr(var, max_width):
258    """Build a one-line summary of a variable's data."""
259    if var._in_memory:
260        return format_array_flat(var, max_width)
261    elif hasattr(var._data, "_repr_inline_"):
262        return var._data._repr_inline_(max_width)
263    elif isinstance(var._data, dask_array_type):
264        return inline_dask_repr(var.data)
265    elif isinstance(var._data, sparse_array_type):
266        return inline_sparse_repr(var.data)
267    elif hasattr(var._data, "__array_function__"):
268        return maybe_truncate(repr(var._data).replace("\n", " "), max_width)
269    else:
270        # internal xarray array type
271        return "..."
272
273
274def summarize_variable(
275    name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None
276):
277    """Summarize a variable in one line, e.g., for the Dataset.__repr__."""
278    if max_width is None:
279        max_width_options = OPTIONS["display_width"]
280        if not isinstance(max_width_options, int):
281            raise TypeError(f"`max_width` value of `{max_width}` is not a valid int")
282        else:
283            max_width = max_width_options
284    first_col = pretty_print(f"  {marker} {name} ", col_width)
285    if var.dims:
286        dims_str = "({}) ".format(", ".join(map(str, var.dims)))
287    else:
288        dims_str = ""
289    front_str = f"{first_col}{dims_str}{var.dtype} "
290
291    values_width = max_width - len(front_str)
292    values_str = inline_variable_array_repr(var, values_width)
293
294    return front_str + values_str
295
296
297def _summarize_coord_multiindex(coord, col_width, marker):
298    first_col = pretty_print(f"  {marker} {coord.name} ", col_width)
299    return "{}({}) MultiIndex".format(first_col, str(coord.dims[0]))
300
301
302def _summarize_coord_levels(coord, col_width, marker="-"):
303    if len(coord) > 100 and col_width < len(coord):
304        n_values = col_width
305        indices = list(range(0, n_values)) + list(range(-n_values, 0))
306        subset = coord[indices]
307    else:
308        subset = coord
309
310    return "\n".join(
311        summarize_variable(
312            lname, subset.get_level_variable(lname), col_width, marker=marker
313        )
314        for lname in subset.level_names
315    )
316
317
318def summarize_datavar(name, var, col_width):
319    return summarize_variable(name, var.variable, col_width)
320
321
322def summarize_coord(name: Hashable, var, col_width: int):
323    is_index = name in var.dims
324    marker = "*" if is_index else " "
325    if is_index:
326        coord = var.variable.to_index_variable()
327        if coord.level_names is not None:
328            return "\n".join(
329                [
330                    _summarize_coord_multiindex(coord, col_width, marker),
331                    _summarize_coord_levels(coord, col_width),
332                ]
333            )
334    return summarize_variable(name, var.variable, col_width, marker)
335
336
337def summarize_attr(key, value, col_width=None):
338    """Summary for __repr__ - use ``X.attrs[key]`` for full value."""
339    # Indent key and add ':', then right-pad if col_width is not None
340    k_str = f"    {key}:"
341    if col_width is not None:
342        k_str = pretty_print(k_str, col_width)
343    # Replace tabs and newlines, so we print on one line in known width
344    v_str = str(value).replace("\t", "\\t").replace("\n", "\\n")
345    # Finally, truncate to the desired display width
346    return maybe_truncate(f"{k_str} {v_str}", OPTIONS["display_width"])
347
348
349EMPTY_REPR = "    *empty*"
350
351
352def _get_col_items(mapping):
353    """Get all column items to format, including both keys of `mapping`
354    and MultiIndex levels if any.
355    """
356    from .variable import IndexVariable
357
358    col_items = []
359    for k, v in mapping.items():
360        col_items.append(k)
361        var = getattr(v, "variable", v)
362        if isinstance(var, IndexVariable):
363            level_names = var.to_index_variable().level_names
364            if level_names is not None:
365                col_items += list(level_names)
366    return col_items
367
368
369def _calculate_col_width(col_items):
370    max_name_length = max(len(str(s)) for s in col_items) if col_items else 0
371    col_width = max(max_name_length, 7) + 6
372    return col_width
373
374
375def _mapping_repr(
376    mapping, title, summarizer, expand_option_name, col_width=None, max_rows=None
377):
378    if col_width is None:
379        col_width = _calculate_col_width(mapping)
380    summary = [f"{title}:"]
381    if mapping:
382        len_mapping = len(mapping)
383        if not _get_boolean_with_default(expand_option_name, default=True):
384            summary = [f"{summary[0]} ({len_mapping})"]
385        elif max_rows is not None and len_mapping > max_rows:
386            summary = [f"{summary[0]} ({max_rows}/{len_mapping})"]
387            first_rows = max_rows // 2 + max_rows % 2
388            keys = list(mapping.keys())
389            summary += [summarizer(k, mapping[k], col_width) for k in keys[:first_rows]]
390            if max_rows > 1:
391                last_rows = max_rows // 2
392                summary += [pretty_print("    ...", col_width) + " ..."]
393                summary += [
394                    summarizer(k, mapping[k], col_width) for k in keys[-last_rows:]
395                ]
396        else:
397            summary += [summarizer(k, v, col_width) for k, v in mapping.items()]
398    else:
399        summary += [EMPTY_REPR]
400    return "\n".join(summary)
401
402
403data_vars_repr = functools.partial(
404    _mapping_repr,
405    title="Data variables",
406    summarizer=summarize_datavar,
407    expand_option_name="display_expand_data_vars",
408)
409
410
411attrs_repr = functools.partial(
412    _mapping_repr,
413    title="Attributes",
414    summarizer=summarize_attr,
415    expand_option_name="display_expand_attrs",
416)
417
418
419def coords_repr(coords, col_width=None, max_rows=None):
420    if col_width is None:
421        col_width = _calculate_col_width(_get_col_items(coords))
422    return _mapping_repr(
423        coords,
424        title="Coordinates",
425        summarizer=summarize_coord,
426        expand_option_name="display_expand_coords",
427        col_width=col_width,
428        max_rows=max_rows,
429    )
430
431
432def indexes_repr(indexes):
433    summary = []
434    for k, v in indexes.items():
435        summary.append(wrap_indent(repr(v), f"{k}: "))
436    return "\n".join(summary)
437
438
439def dim_summary(obj):
440    elements = [f"{k}: {v}" for k, v in obj.sizes.items()]
441    return ", ".join(elements)
442
443
444def unindexed_dims_repr(dims, coords):
445    unindexed_dims = [d for d in dims if d not in coords]
446    if unindexed_dims:
447        dims_str = ", ".join(f"{d}" for d in unindexed_dims)
448        return "Dimensions without coordinates: " + dims_str
449    else:
450        return None
451
452
453@contextlib.contextmanager
454def set_numpy_options(*args, **kwargs):
455    original = np.get_printoptions()
456    np.set_printoptions(*args, **kwargs)
457    try:
458        yield
459    finally:
460        np.set_printoptions(**original)
461
462
463def limit_lines(string: str, *, limit: int):
464    """
465    If the string is more lines than the limit,
466    this returns the middle lines replaced by an ellipsis
467    """
468    lines = string.splitlines()
469    if len(lines) > limit:
470        string = "\n".join(chain(lines[: limit // 2], ["..."], lines[-limit // 2 :]))
471    return string
472
473
474def short_numpy_repr(array):
475    array = np.asarray(array)
476
477    # default to lower precision so a full (abbreviated) line can fit on
478    # one line with the default display_width
479    options = {"precision": 6, "linewidth": OPTIONS["display_width"], "threshold": 200}
480    if array.ndim < 3:
481        edgeitems = 3
482    elif array.ndim == 3:
483        edgeitems = 2
484    else:
485        edgeitems = 1
486    options["edgeitems"] = edgeitems
487    with set_numpy_options(**options):
488        return repr(array)
489
490
491def short_data_repr(array):
492    """Format "data" for DataArray and Variable."""
493    internal_data = getattr(array, "variable", array)._data
494    if isinstance(array, np.ndarray):
495        return short_numpy_repr(array)
496    elif is_duck_array(internal_data):
497        return limit_lines(repr(array.data), limit=40)
498    elif array._in_memory or array.size < 1e5:
499        return short_numpy_repr(array)
500    else:
501        # internal xarray array type
502        return f"[{array.size} values with dtype={array.dtype}]"
503
504
505def array_repr(arr):
506    from .variable import Variable
507
508    # used for DataArray, Variable and IndexVariable
509    if hasattr(arr, "name") and arr.name is not None:
510        name_str = f"{arr.name!r} "
511    else:
512        name_str = ""
513
514    if (
515        isinstance(arr, Variable)
516        or _get_boolean_with_default("display_expand_data", default=True)
517        or isinstance(arr.variable._data, MemoryCachedArray)
518    ):
519        data_repr = short_data_repr(arr)
520    else:
521        data_repr = inline_variable_array_repr(arr.variable, OPTIONS["display_width"])
522
523    summary = [
524        "<xarray.{} {}({})>".format(type(arr).__name__, name_str, dim_summary(arr)),
525        data_repr,
526    ]
527
528    if hasattr(arr, "coords"):
529        if arr.coords:
530            summary.append(repr(arr.coords))
531
532        unindexed_dims_str = unindexed_dims_repr(arr.dims, arr.coords)
533        if unindexed_dims_str:
534            summary.append(unindexed_dims_str)
535
536    if arr.attrs:
537        summary.append(attrs_repr(arr.attrs))
538
539    return "\n".join(summary)
540
541
542def dataset_repr(ds):
543    summary = ["<xarray.{}>".format(type(ds).__name__)]
544
545    col_width = _calculate_col_width(_get_col_items(ds.variables))
546    max_rows = OPTIONS["display_max_rows"]
547
548    dims_start = pretty_print("Dimensions:", col_width)
549    summary.append("{}({})".format(dims_start, dim_summary(ds)))
550
551    if ds.coords:
552        summary.append(coords_repr(ds.coords, col_width=col_width, max_rows=max_rows))
553
554    unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords)
555    if unindexed_dims_str:
556        summary.append(unindexed_dims_str)
557
558    summary.append(data_vars_repr(ds.data_vars, col_width=col_width, max_rows=max_rows))
559
560    if ds.attrs:
561        summary.append(attrs_repr(ds.attrs, max_rows=max_rows))
562
563    return "\n".join(summary)
564
565
566def diff_dim_summary(a, b):
567    if a.dims != b.dims:
568        return "Differing dimensions:\n    ({}) != ({})".format(
569            dim_summary(a), dim_summary(b)
570        )
571    else:
572        return ""
573
574
575def _diff_mapping_repr(a_mapping, b_mapping, compat, title, summarizer, col_width=None):
576    def extra_items_repr(extra_keys, mapping, ab_side):
577        extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys]
578        if extra_repr:
579            header = f"{title} only on the {ab_side} object:"
580            return [header] + extra_repr
581        else:
582            return []
583
584    a_keys = set(a_mapping)
585    b_keys = set(b_mapping)
586
587    summary = []
588
589    diff_items = []
590
591    for k in a_keys & b_keys:
592        try:
593            # compare xarray variable
594            if not callable(compat):
595                compatible = getattr(a_mapping[k], compat)(b_mapping[k])
596            else:
597                compatible = compat(a_mapping[k], b_mapping[k])
598            is_variable = True
599        except AttributeError:
600            # compare attribute value
601            if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]):
602                compatible = array_equiv(a_mapping[k], b_mapping[k])
603            else:
604                compatible = a_mapping[k] == b_mapping[k]
605
606            is_variable = False
607
608        if not compatible:
609            temp = [
610                summarizer(k, vars[k], col_width) for vars in (a_mapping, b_mapping)
611            ]
612
613            if compat == "identical" and is_variable:
614                attrs_summary = []
615
616                for m in (a_mapping, b_mapping):
617                    attr_s = "\n".join(
618                        summarize_attr(ak, av) for ak, av in m[k].attrs.items()
619                    )
620                    attrs_summary.append(attr_s)
621
622                temp = [
623                    "\n".join([var_s, attr_s]) if attr_s else var_s
624                    for var_s, attr_s in zip(temp, attrs_summary)
625                ]
626
627            diff_items += [ab_side + s[1:] for ab_side, s in zip(("L", "R"), temp)]
628
629    if diff_items:
630        summary += [f"Differing {title.lower()}:"] + diff_items
631
632    summary += extra_items_repr(a_keys - b_keys, a_mapping, "left")
633    summary += extra_items_repr(b_keys - a_keys, b_mapping, "right")
634
635    return "\n".join(summary)
636
637
638diff_coords_repr = functools.partial(
639    _diff_mapping_repr, title="Coordinates", summarizer=summarize_coord
640)
641
642
643diff_data_vars_repr = functools.partial(
644    _diff_mapping_repr, title="Data variables", summarizer=summarize_datavar
645)
646
647
648diff_attrs_repr = functools.partial(
649    _diff_mapping_repr, title="Attributes", summarizer=summarize_attr
650)
651
652
653def _compat_to_str(compat):
654    if callable(compat):
655        compat = compat.__name__
656
657    if compat == "equals":
658        return "equal"
659    elif compat == "allclose":
660        return "close"
661    else:
662        return compat
663
664
665def diff_array_repr(a, b, compat):
666    # used for DataArray, Variable and IndexVariable
667    summary = [
668        "Left and right {} objects are not {}".format(
669            type(a).__name__, _compat_to_str(compat)
670        )
671    ]
672
673    summary.append(diff_dim_summary(a, b))
674    if callable(compat):
675        equiv = compat
676    else:
677        equiv = array_equiv
678
679    if not equiv(a.data, b.data):
680        temp = [wrap_indent(short_numpy_repr(obj), start="    ") for obj in (a, b)]
681        diff_data_repr = [
682            ab_side + "\n" + ab_data_repr
683            for ab_side, ab_data_repr in zip(("L", "R"), temp)
684        ]
685        summary += ["Differing values:"] + diff_data_repr
686
687    if hasattr(a, "coords"):
688        col_width = _calculate_col_width(set(a.coords) | set(b.coords))
689        summary.append(
690            diff_coords_repr(a.coords, b.coords, compat, col_width=col_width)
691        )
692
693    if compat == "identical":
694        summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))
695
696    return "\n".join(summary)
697
698
699def diff_dataset_repr(a, b, compat):
700    summary = [
701        "Left and right {} objects are not {}".format(
702            type(a).__name__, _compat_to_str(compat)
703        )
704    ]
705
706    col_width = _calculate_col_width(
707        set(_get_col_items(a.variables) + _get_col_items(b.variables))
708    )
709
710    summary.append(diff_dim_summary(a, b))
711    summary.append(diff_coords_repr(a.coords, b.coords, compat, col_width=col_width))
712    summary.append(
713        diff_data_vars_repr(a.data_vars, b.data_vars, compat, col_width=col_width)
714    )
715
716    if compat == "identical":
717        summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))
718
719    return "\n".join(summary)
720