1import warnings
2from collections import defaultdict
3
4import numpy as np
5import pandas as pd
6
7from .coding import strings, times, variables
8from .coding.variables import SerializationWarning, pop_to
9from .core import duck_array_ops, indexing
10from .core.common import contains_cftime_datetimes
11from .core.pycompat import is_duck_dask_array
12from .core.variable import IndexVariable, Variable, as_variable
13
14CF_RELATED_DATA = (
15    "bounds",
16    "grid_mapping",
17    "climatology",
18    "geometry",
19    "node_coordinates",
20    "node_count",
21    "part_node_count",
22    "interior_ring",
23    "cell_measures",
24    "formula_terms",
25)
26CF_RELATED_DATA_NEEDS_PARSING = (
27    "cell_measures",
28    "formula_terms",
29)
30
31
32class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin):
33    """Decode arrays on the fly from non-native to native endianness
34
35    This is useful for decoding arrays from netCDF3 files (which are all
36    big endian) into native endianness, so they can be used with Cython
37    functions, such as those found in bottleneck and pandas.
38
39    >>> x = np.arange(5, dtype=">i2")
40
41    >>> x.dtype
42    dtype('>i2')
43
44    >>> NativeEndiannessArray(x).dtype
45    dtype('int16')
46
47    >>> indexer = indexing.BasicIndexer((slice(None),))
48    >>> NativeEndiannessArray(x)[indexer].dtype
49    dtype('int16')
50    """
51
52    __slots__ = ("array",)
53
54    def __init__(self, array):
55        self.array = indexing.as_indexable(array)
56
57    @property
58    def dtype(self):
59        return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize))
60
61    def __getitem__(self, key):
62        return np.asarray(self.array[key], dtype=self.dtype)
63
64
65class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin):
66    """Decode arrays on the fly from integer to boolean datatype
67
68    This is useful for decoding boolean arrays from integer typed netCDF
69    variables.
70
71    >>> x = np.array([1, 0, 1, 1, 0], dtype="i1")
72
73    >>> x.dtype
74    dtype('int8')
75
76    >>> BoolTypeArray(x).dtype
77    dtype('bool')
78
79    >>> indexer = indexing.BasicIndexer((slice(None),))
80    >>> BoolTypeArray(x)[indexer].dtype
81    dtype('bool')
82    """
83
84    __slots__ = ("array",)
85
86    def __init__(self, array):
87        self.array = indexing.as_indexable(array)
88
89    @property
90    def dtype(self):
91        return np.dtype("bool")
92
93    def __getitem__(self, key):
94        return np.asarray(self.array[key], dtype=self.dtype)
95
96
97def _var_as_tuple(var):
98    return var.dims, var.data, var.attrs.copy(), var.encoding.copy()
99
100
101def maybe_encode_nonstring_dtype(var, name=None):
102    if "dtype" in var.encoding and var.encoding["dtype"] not in ("S1", str):
103        dims, data, attrs, encoding = _var_as_tuple(var)
104        dtype = np.dtype(encoding.pop("dtype"))
105        if dtype != var.dtype:
106            if np.issubdtype(dtype, np.integer):
107                if (
108                    np.issubdtype(var.dtype, np.floating)
109                    and "_FillValue" not in var.attrs
110                    and "missing_value" not in var.attrs
111                ):
112                    warnings.warn(
113                        f"saving variable {name} with floating "
114                        "point data as an integer dtype without "
115                        "any _FillValue to use for NaNs",
116                        SerializationWarning,
117                        stacklevel=10,
118                    )
119                data = duck_array_ops.around(data)[...]
120            data = data.astype(dtype=dtype)
121        var = Variable(dims, data, attrs, encoding)
122    return var
123
124
125def maybe_default_fill_value(var):
126    # make NaN the fill value for float types:
127    if (
128        "_FillValue" not in var.attrs
129        and "_FillValue" not in var.encoding
130        and np.issubdtype(var.dtype, np.floating)
131    ):
132        var.attrs["_FillValue"] = var.dtype.type(np.nan)
133    return var
134
135
136def maybe_encode_bools(var):
137    if (
138        (var.dtype == bool)
139        and ("dtype" not in var.encoding)
140        and ("dtype" not in var.attrs)
141    ):
142        dims, data, attrs, encoding = _var_as_tuple(var)
143        attrs["dtype"] = "bool"
144        data = data.astype(dtype="i1", copy=True)
145        var = Variable(dims, data, attrs, encoding)
146    return var
147
148
149def _infer_dtype(array, name=None):
150    """Given an object array with no missing values, infer its dtype from its
151    first element
152    """
153    if array.dtype.kind != "O":
154        raise TypeError("infer_type must be called on a dtype=object array")
155
156    if array.size == 0:
157        return np.dtype(float)
158
159    element = array[(0,) * array.ndim]
160    if isinstance(element, (bytes, str)):
161        return strings.create_vlen_dtype(type(element))
162
163    dtype = np.array(element).dtype
164    if dtype.kind != "O":
165        return dtype
166
167    raise ValueError(
168        "unable to infer dtype on variable {!r}; xarray "
169        "cannot serialize arbitrary Python objects".format(name)
170    )
171
172
173def ensure_not_multiindex(var, name=None):
174    if isinstance(var, IndexVariable) and isinstance(var.to_index(), pd.MultiIndex):
175        raise NotImplementedError(
176            "variable {!r} is a MultiIndex, which cannot yet be "
177            "serialized to netCDF files "
178            "(https://github.com/pydata/xarray/issues/1077). Use "
179            "reset_index() to convert MultiIndex levels into coordinate "
180            "variables instead.".format(name)
181        )
182
183
184def _copy_with_dtype(data, dtype):
185    """Create a copy of an array with the given dtype.
186
187    We use this instead of np.array() to ensure that custom object dtypes end
188    up on the resulting array.
189    """
190    result = np.empty(data.shape, dtype)
191    result[...] = data
192    return result
193
194
195def ensure_dtype_not_object(var, name=None):
196    # TODO: move this from conventions to backends? (it's not CF related)
197    if var.dtype.kind == "O":
198        dims, data, attrs, encoding = _var_as_tuple(var)
199
200        if is_duck_dask_array(data):
201            warnings.warn(
202                "variable {} has data in the form of a dask array with "
203                "dtype=object, which means it is being loaded into memory "
204                "to determine a data type that can be safely stored on disk. "
205                "To avoid this, coerce this variable to a fixed-size dtype "
206                "with astype() before saving it.".format(name),
207                SerializationWarning,
208            )
209            data = data.compute()
210
211        missing = pd.isnull(data)
212        if missing.any():
213            # nb. this will fail for dask.array data
214            non_missing_values = data[~missing]
215            inferred_dtype = _infer_dtype(non_missing_values, name)
216
217            # There is no safe bit-pattern for NA in typical binary string
218            # formats, we so can't set a fill_value. Unfortunately, this means
219            # we can't distinguish between missing values and empty strings.
220            if strings.is_bytes_dtype(inferred_dtype):
221                fill_value = b""
222            elif strings.is_unicode_dtype(inferred_dtype):
223                fill_value = ""
224            else:
225                # insist on using float for numeric values
226                if not np.issubdtype(inferred_dtype, np.floating):
227                    inferred_dtype = np.dtype(float)
228                fill_value = inferred_dtype.type(np.nan)
229
230            data = _copy_with_dtype(data, dtype=inferred_dtype)
231            data[missing] = fill_value
232        else:
233            data = _copy_with_dtype(data, dtype=_infer_dtype(data, name))
234
235        assert data.dtype.kind != "O" or data.dtype.metadata
236        var = Variable(dims, data, attrs, encoding)
237    return var
238
239
240def encode_cf_variable(var, needs_copy=True, name=None):
241    """
242    Converts an Variable into an Variable which follows some
243    of the CF conventions:
244
245        - Nans are masked using _FillValue (or the deprecated missing_value)
246        - Rescaling via: scale_factor and add_offset
247        - datetimes are converted to the CF 'units since time' format
248        - dtype encodings are enforced.
249
250    Parameters
251    ----------
252    var : Variable
253        A variable holding un-encoded data.
254
255    Returns
256    -------
257    out : Variable
258        A variable which has been encoded as described above.
259    """
260    ensure_not_multiindex(var, name=name)
261
262    for coder in [
263        times.CFDatetimeCoder(),
264        times.CFTimedeltaCoder(),
265        variables.CFScaleOffsetCoder(),
266        variables.CFMaskCoder(),
267        variables.UnsignedIntegerCoder(),
268    ]:
269        var = coder.encode(var, name=name)
270
271    # TODO(shoyer): convert all of these to use coders, too:
272    var = maybe_encode_nonstring_dtype(var, name=name)
273    var = maybe_default_fill_value(var)
274    var = maybe_encode_bools(var)
275    var = ensure_dtype_not_object(var, name=name)
276
277    for attr_name in CF_RELATED_DATA:
278        pop_to(var.encoding, var.attrs, attr_name)
279    return var
280
281
282def decode_cf_variable(
283    name,
284    var,
285    concat_characters=True,
286    mask_and_scale=True,
287    decode_times=True,
288    decode_endianness=True,
289    stack_char_dim=True,
290    use_cftime=None,
291    decode_timedelta=None,
292):
293    """
294    Decodes a variable which may hold CF encoded information.
295
296    This includes variables that have been masked and scaled, which
297    hold CF style time variables (this is almost always the case if
298    the dataset has been serialized) and which have strings encoded
299    as character arrays.
300
301    Parameters
302    ----------
303    name : str
304        Name of the variable. Used for better error messages.
305    var : Variable
306        A variable holding potentially CF encoded information.
307    concat_characters : bool
308        Should character arrays be concatenated to strings, for
309        example: ["h", "e", "l", "l", "o"] -> "hello"
310    mask_and_scale : bool
311        Lazily scale (using scale_factor and add_offset) and mask
312        (using _FillValue). If the _Unsigned attribute is present
313        treat integer arrays as unsigned.
314    decode_times : bool
315        Decode cf times ("hours since 2000-01-01") to np.datetime64.
316    decode_endianness : bool
317        Decode arrays from non-native to native endianness.
318    stack_char_dim : bool
319        Whether to stack characters into bytes along the last dimension of this
320        array. Passed as an argument because we need to look at the full
321        dataset to figure out if this is appropriate.
322    use_cftime : bool, optional
323        Only relevant if encoded dates come from a standard calendar
324        (e.g. "gregorian", "proleptic_gregorian", "standard", or not
325        specified).  If None (default), attempt to decode times to
326        ``np.datetime64[ns]`` objects; if this is not possible, decode times to
327        ``cftime.datetime`` objects. If True, always decode times to
328        ``cftime.datetime`` objects, regardless of whether or not they can be
329        represented using ``np.datetime64[ns]`` objects.  If False, always
330        decode times to ``np.datetime64[ns]`` objects; if this is not possible
331        raise an error.
332
333    Returns
334    -------
335    out : Variable
336        A variable holding the decoded equivalent of var.
337    """
338    var = as_variable(var)
339    original_dtype = var.dtype
340
341    if decode_timedelta is None:
342        decode_timedelta = decode_times
343
344    if concat_characters:
345        if stack_char_dim:
346            var = strings.CharacterArrayCoder().decode(var, name=name)
347        var = strings.EncodedStringCoder().decode(var)
348
349    if mask_and_scale:
350        for coder in [
351            variables.UnsignedIntegerCoder(),
352            variables.CFMaskCoder(),
353            variables.CFScaleOffsetCoder(),
354        ]:
355            var = coder.decode(var, name=name)
356
357    if decode_timedelta:
358        var = times.CFTimedeltaCoder().decode(var, name=name)
359    if decode_times:
360        var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)
361
362    dimensions, data, attributes, encoding = variables.unpack_for_decoding(var)
363    # TODO(shoyer): convert everything below to use coders
364
365    if decode_endianness and not data.dtype.isnative:
366        # do this last, so it's only done if we didn't already unmask/scale
367        data = NativeEndiannessArray(data)
368        original_dtype = data.dtype
369
370    encoding.setdefault("dtype", original_dtype)
371
372    if "dtype" in attributes and attributes["dtype"] == "bool":
373        del attributes["dtype"]
374        data = BoolTypeArray(data)
375
376    if not is_duck_dask_array(data):
377        data = indexing.LazilyIndexedArray(data)
378
379    return Variable(dimensions, data, attributes, encoding=encoding)
380
381
382def _update_bounds_attributes(variables):
383    """Adds time attributes to time bounds variables.
384
385    Variables handling time bounds ("Cell boundaries" in the CF
386    conventions) do not necessarily carry the necessary attributes to be
387    decoded. This copies the attributes from the time variable to the
388    associated boundaries.
389
390    See Also:
391
392    http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
393         cf-conventions.html#cell-boundaries
394
395    https://github.com/pydata/xarray/issues/2565
396    """
397
398    # For all time variables with bounds
399    for v in variables.values():
400        attrs = v.attrs
401        has_date_units = "units" in attrs and "since" in attrs["units"]
402        if has_date_units and "bounds" in attrs:
403            if attrs["bounds"] in variables:
404                bounds_attrs = variables[attrs["bounds"]].attrs
405                bounds_attrs.setdefault("units", attrs["units"])
406                if "calendar" in attrs:
407                    bounds_attrs.setdefault("calendar", attrs["calendar"])
408
409
410def _update_bounds_encoding(variables):
411    """Adds time encoding to time bounds variables.
412
413    Variables handling time bounds ("Cell boundaries" in the CF
414    conventions) do not necessarily carry the necessary attributes to be
415    decoded. This copies the encoding from the time variable to the
416    associated bounds variable so that we write CF-compliant files.
417
418    See Also:
419
420    http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
421         cf-conventions.html#cell-boundaries
422
423    https://github.com/pydata/xarray/issues/2565
424    """
425
426    # For all time variables with bounds
427    for v in variables.values():
428        attrs = v.attrs
429        encoding = v.encoding
430        has_date_units = "units" in encoding and "since" in encoding["units"]
431        is_datetime_type = np.issubdtype(
432            v.dtype, np.datetime64
433        ) or contains_cftime_datetimes(v)
434
435        if (
436            is_datetime_type
437            and not has_date_units
438            and "bounds" in attrs
439            and attrs["bounds"] in variables
440        ):
441            warnings.warn(
442                "Variable '{0}' has datetime type and a "
443                "bounds variable but {0}.encoding does not have "
444                "units specified. The units encodings for '{0}' "
445                "and '{1}' will be determined independently "
446                "and may not be equal, counter to CF-conventions. "
447                "If this is a concern, specify a units encoding for "
448                "'{0}' before writing to a file.".format(v.name, attrs["bounds"]),
449                UserWarning,
450            )
451
452        if has_date_units and "bounds" in attrs:
453            if attrs["bounds"] in variables:
454                bounds_encoding = variables[attrs["bounds"]].encoding
455                bounds_encoding.setdefault("units", encoding["units"])
456                if "calendar" in encoding:
457                    bounds_encoding.setdefault("calendar", encoding["calendar"])
458
459
460def decode_cf_variables(
461    variables,
462    attributes,
463    concat_characters=True,
464    mask_and_scale=True,
465    decode_times=True,
466    decode_coords=True,
467    drop_variables=None,
468    use_cftime=None,
469    decode_timedelta=None,
470):
471    """
472    Decode several CF encoded variables.
473
474    See: decode_cf_variable
475    """
476    dimensions_used_by = defaultdict(list)
477    for v in variables.values():
478        for d in v.dims:
479            dimensions_used_by[d].append(v)
480
481    def stackable(dim):
482        # figure out if a dimension can be concatenated over
483        if dim in variables:
484            return False
485        for v in dimensions_used_by[dim]:
486            if v.dtype.kind != "S" or dim != v.dims[-1]:
487                return False
488        return True
489
490    coord_names = set()
491
492    if isinstance(drop_variables, str):
493        drop_variables = [drop_variables]
494    elif drop_variables is None:
495        drop_variables = []
496    drop_variables = set(drop_variables)
497
498    # Time bounds coordinates might miss the decoding attributes
499    if decode_times:
500        _update_bounds_attributes(variables)
501
502    new_vars = {}
503    for k, v in variables.items():
504        if k in drop_variables:
505            continue
506        stack_char_dim = (
507            concat_characters
508            and v.dtype == "S1"
509            and v.ndim > 0
510            and stackable(v.dims[-1])
511        )
512        new_vars[k] = decode_cf_variable(
513            k,
514            v,
515            concat_characters=concat_characters,
516            mask_and_scale=mask_and_scale,
517            decode_times=decode_times,
518            stack_char_dim=stack_char_dim,
519            use_cftime=use_cftime,
520            decode_timedelta=decode_timedelta,
521        )
522        if decode_coords in [True, "coordinates", "all"]:
523            var_attrs = new_vars[k].attrs
524            if "coordinates" in var_attrs:
525                coord_str = var_attrs["coordinates"]
526                var_coord_names = coord_str.split()
527                if all(k in variables for k in var_coord_names):
528                    new_vars[k].encoding["coordinates"] = coord_str
529                    del var_attrs["coordinates"]
530                    coord_names.update(var_coord_names)
531
532        if decode_coords == "all":
533            for attr_name in CF_RELATED_DATA:
534                if attr_name in var_attrs:
535                    attr_val = var_attrs[attr_name]
536                    if attr_name not in CF_RELATED_DATA_NEEDS_PARSING:
537                        var_names = attr_val.split()
538                    else:
539                        roles_and_names = [
540                            role_or_name
541                            for part in attr_val.split(":")
542                            for role_or_name in part.split()
543                        ]
544                        if len(roles_and_names) % 2 == 1:
545                            warnings.warn(
546                                f"Attribute {attr_name:s} malformed", stacklevel=5
547                            )
548                        var_names = roles_and_names[1::2]
549                    if all(var_name in variables for var_name in var_names):
550                        new_vars[k].encoding[attr_name] = attr_val
551                        coord_names.update(var_names)
552                    else:
553                        referenced_vars_not_in_variables = [
554                            proj_name
555                            for proj_name in var_names
556                            if proj_name not in variables
557                        ]
558                        warnings.warn(
559                            f"Variable(s) referenced in {attr_name:s} not in variables: {referenced_vars_not_in_variables!s}",
560                            stacklevel=5,
561                        )
562                    del var_attrs[attr_name]
563
564    if decode_coords and "coordinates" in attributes:
565        attributes = dict(attributes)
566        coord_names.update(attributes.pop("coordinates").split())
567
568    return new_vars, attributes, coord_names
569
570
571def decode_cf(
572    obj,
573    concat_characters=True,
574    mask_and_scale=True,
575    decode_times=True,
576    decode_coords=True,
577    drop_variables=None,
578    use_cftime=None,
579    decode_timedelta=None,
580):
581    """Decode the given Dataset or Datastore according to CF conventions into
582    a new Dataset.
583
584    Parameters
585    ----------
586    obj : Dataset or DataStore
587        Object to decode.
588    concat_characters : bool, optional
589        Should character arrays be concatenated to strings, for
590        example: ["h", "e", "l", "l", "o"] -> "hello"
591    mask_and_scale : bool, optional
592        Lazily scale (using scale_factor and add_offset) and mask
593        (using _FillValue).
594    decode_times : bool, optional
595        Decode cf times (e.g., integers since "hours since 2000-01-01") to
596        np.datetime64.
597    decode_coords : bool or {"coordinates", "all"}, optional
598        Controls which variables are set as coordinate variables:
599
600        - "coordinates" or True: Set variables referred to in the
601          ``'coordinates'`` attribute of the datasets or individual variables
602          as coordinate variables.
603        - "all": Set variables referred to in  ``'grid_mapping'``, ``'bounds'`` and
604          other attributes as coordinate variables.
605    drop_variables : str or iterable, optional
606        A variable or list of variables to exclude from being parsed from the
607        dataset. This may be useful to drop variables with problems or
608        inconsistent values.
609    use_cftime : bool, optional
610        Only relevant if encoded dates come from a standard calendar
611        (e.g. "gregorian", "proleptic_gregorian", "standard", or not
612        specified).  If None (default), attempt to decode times to
613        ``np.datetime64[ns]`` objects; if this is not possible, decode times to
614        ``cftime.datetime`` objects. If True, always decode times to
615        ``cftime.datetime`` objects, regardless of whether or not they can be
616        represented using ``np.datetime64[ns]`` objects.  If False, always
617        decode times to ``np.datetime64[ns]`` objects; if this is not possible
618        raise an error.
619    decode_timedelta : bool, optional
620        If True, decode variables and coordinates with time units in
621        {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
622        into timedelta objects. If False, leave them encoded as numbers.
623        If None (default), assume the same value of decode_time.
624
625    Returns
626    -------
627    decoded : Dataset
628    """
629    from .backends.common import AbstractDataStore
630    from .core.dataset import Dataset
631
632    if isinstance(obj, Dataset):
633        vars = obj._variables
634        attrs = obj.attrs
635        extra_coords = set(obj.coords)
636        close = obj._close
637        encoding = obj.encoding
638    elif isinstance(obj, AbstractDataStore):
639        vars, attrs = obj.load()
640        extra_coords = set()
641        close = obj.close
642        encoding = obj.get_encoding()
643    else:
644        raise TypeError("can only decode Dataset or DataStore objects")
645
646    vars, attrs, coord_names = decode_cf_variables(
647        vars,
648        attrs,
649        concat_characters,
650        mask_and_scale,
651        decode_times,
652        decode_coords,
653        drop_variables=drop_variables,
654        use_cftime=use_cftime,
655        decode_timedelta=decode_timedelta,
656    )
657    ds = Dataset(vars, attrs=attrs)
658    ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
659    ds.set_close(close)
660    ds.encoding = encoding
661
662    return ds
663
664
665def cf_decoder(
666    variables,
667    attributes,
668    concat_characters=True,
669    mask_and_scale=True,
670    decode_times=True,
671):
672    """
673    Decode a set of CF encoded variables and attributes.
674
675    Parameters
676    ----------
677    variables : dict
678        A dictionary mapping from variable name to xarray.Variable
679    attributes : dict
680        A dictionary mapping from attribute name to value
681    concat_characters : bool
682        Should character arrays be concatenated to strings, for
683        example: ["h", "e", "l", "l", "o"] -> "hello"
684    mask_and_scale : bool
685        Lazily scale (using scale_factor and add_offset) and mask
686        (using _FillValue).
687    decode_times : bool
688        Decode cf times ("hours since 2000-01-01") to np.datetime64.
689
690    Returns
691    -------
692    decoded_variables : dict
693        A dictionary mapping from variable name to xarray.Variable objects.
694    decoded_attributes : dict
695        A dictionary mapping from attribute name to values.
696
697    See Also
698    --------
699    decode_cf_variable
700    """
701    variables, attributes, _ = decode_cf_variables(
702        variables, attributes, concat_characters, mask_and_scale, decode_times
703    )
704    return variables, attributes
705
706
707def _encode_coordinates(variables, attributes, non_dim_coord_names):
708    # calculate global and variable specific coordinates
709    non_dim_coord_names = set(non_dim_coord_names)
710
711    for name in list(non_dim_coord_names):
712        if isinstance(name, str) and " " in name:
713            warnings.warn(
714                "coordinate {!r} has a space in its name, which means it "
715                "cannot be marked as a coordinate on disk and will be "
716                "saved as a data variable instead".format(name),
717                SerializationWarning,
718                stacklevel=6,
719            )
720            non_dim_coord_names.discard(name)
721
722    global_coordinates = non_dim_coord_names.copy()
723    variable_coordinates = defaultdict(set)
724    not_technically_coordinates = set()
725    for coord_name in non_dim_coord_names:
726        target_dims = variables[coord_name].dims
727        for k, v in variables.items():
728            if (
729                k not in non_dim_coord_names
730                and k not in v.dims
731                and set(target_dims) <= set(v.dims)
732            ):
733                variable_coordinates[k].add(coord_name)
734
735            if any(
736                attr_name in v.encoding and coord_name in v.encoding.get(attr_name)
737                for attr_name in CF_RELATED_DATA
738            ):
739                not_technically_coordinates.add(coord_name)
740                global_coordinates.discard(coord_name)
741
742    variables = {k: v.copy(deep=False) for k, v in variables.items()}
743
744    # keep track of variable names written to file under the "coordinates" attributes
745    written_coords = set()
746    for name, var in variables.items():
747        encoding = var.encoding
748        attrs = var.attrs
749        if "coordinates" in attrs and "coordinates" in encoding:
750            raise ValueError(
751                f"'coordinates' found in both attrs and encoding for variable {name!r}."
752            )
753
754        # if coordinates set to None, don't write coordinates attribute
755        if (
756            "coordinates" in attrs
757            and attrs.get("coordinates") is None
758            or "coordinates" in encoding
759            and encoding.get("coordinates") is None
760        ):
761            # make sure "coordinates" is removed from attrs/encoding
762            attrs.pop("coordinates", None)
763            encoding.pop("coordinates", None)
764            continue
765
766        # this will copy coordinates from encoding to attrs if "coordinates" in attrs
767        # after the next line, "coordinates" is never in encoding
768        # we get support for attrs["coordinates"] for free.
769        coords_str = pop_to(encoding, attrs, "coordinates")
770        if not coords_str and variable_coordinates[name]:
771            coordinates_text = " ".join(
772                str(coord_name)
773                for coord_name in variable_coordinates[name]
774                if coord_name not in not_technically_coordinates
775            )
776            if coordinates_text:
777                attrs["coordinates"] = coordinates_text
778        if "coordinates" in attrs:
779            written_coords.update(attrs["coordinates"].split())
780
781    # These coordinates are not associated with any particular variables, so we
782    # save them under a global 'coordinates' attribute so xarray can roundtrip
783    # the dataset faithfully. Because this serialization goes beyond CF
784    # conventions, only do it if necessary.
785    # Reference discussion:
786    # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/007571.html
787    global_coordinates.difference_update(written_coords)
788    if global_coordinates:
789        attributes = dict(attributes)
790        if "coordinates" in attributes:
791            warnings.warn(
792                f"cannot serialize global coordinates {global_coordinates!r} because the global "
793                f"attribute 'coordinates' already exists. This may prevent faithful roundtripping"
794                f"of xarray datasets",
795                SerializationWarning,
796            )
797        else:
798            attributes["coordinates"] = " ".join(map(str, global_coordinates))
799
800    return variables, attributes
801
802
803def encode_dataset_coordinates(dataset):
804    """Encode coordinates on the given dataset object into variable specific
805    and global attributes.
806
807    When possible, this is done according to CF conventions.
808
809    Parameters
810    ----------
811    dataset : Dataset
812        Object to encode.
813
814    Returns
815    -------
816    variables : dict
817    attrs : dict
818    """
819    non_dim_coord_names = set(dataset.coords) - set(dataset.dims)
820    return _encode_coordinates(
821        dataset._variables, dataset.attrs, non_dim_coord_names=non_dim_coord_names
822    )
823
824
825def cf_encoder(variables, attributes):
826    """
827    Encode a set of CF encoded variables and attributes.
828    Takes a dicts of variables and attributes and encodes them
829    to conform to CF conventions as much as possible.
830    This includes masking, scaling, character array handling,
831    and CF-time encoding.
832
833    Parameters
834    ----------
835    variables : dict
836        A dictionary mapping from variable name to xarray.Variable
837    attributes : dict
838        A dictionary mapping from attribute name to value
839
840    Returns
841    -------
842    encoded_variables : dict
843        A dictionary mapping from variable name to xarray.Variable,
844    encoded_attributes : dict
845        A dictionary mapping from attribute name to value
846
847    See Also
848    --------
849    decode_cf_variable, encode_cf_variable
850    """
851
852    # add encoding for time bounds variables if present.
853    _update_bounds_encoding(variables)
854
855    new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()}
856
857    # Remove attrs from bounds variables (issue #2921)
858    for var in new_vars.values():
859        bounds = var.attrs["bounds"] if "bounds" in var.attrs else None
860        if bounds and bounds in new_vars:
861            # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries
862            for attr in [
863                "units",
864                "standard_name",
865                "axis",
866                "positive",
867                "calendar",
868                "long_name",
869                "leap_month",
870                "leap_year",
871                "month_lengths",
872            ]:
873                if attr in new_vars[bounds].attrs and attr in var.attrs:
874                    if new_vars[bounds].attrs[attr] == var.attrs[attr]:
875                        new_vars[bounds].attrs.pop(attr)
876
877    return new_vars, attributes
878