1"""Utility functions and classes for the STIX2 library."""
2
3import datetime as dt
4import enum
5import json
6import re
7
8import pytz
9import six
10
11import stix2
12
13# Sentinel value for properties that should be set to the current time.
14# We can't use the standard 'default' approach, since if there are multiple
15# timestamps in a single object, the timestamps will vary by a few microseconds.
16NOW = object()
17
18PREFIX_21_REGEX = re.compile(r'^[a-z].*')
19
20_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
21_TIMESTAMP_FORMAT_FRAC = "%Y-%m-%dT%H:%M:%S.%fZ"
22
23
24class Precision(enum.Enum):
25    """
26    Timestamp format precisions.
27    """
28    # auto() wasn't introduced until Python 3.6.
29    ANY = 1
30    SECOND = 2
31    MILLISECOND = 3
32
33
34class PrecisionConstraint(enum.Enum):
35    """
36    Timestamp precision constraints.  These affect how the Precision
37    values are applied when formatting a timestamp.
38
39    These constraints don't really make sense with the ANY precision, so they
40    have no effect in that case.
41    """
42    EXACT = 1  # format must have exactly the given precision
43    MIN = 2  # format must have at least the given precision
44    #  no need for a MAX constraint yet
45
46
47def _to_enum(value, enum_type, enum_default=None):
48    """
49    Detect and convert strings to enums and None to a default enum.  This
50    allows use of strings and None in APIs, while enforcing the enum type: if
51    you use a string, it must name a valid enum value.  This implementation is
52    case-insensitive.
53
54    :param value: A value to be interpreted as an enum (string, Enum instance,
55        or None).  If an Enum instance, it must be an instance of enum_type.
56    :param enum_type: The enum type which strings will be interpreted against
57    :param enum_default: The default enum to use if value is None.  Must be
58        an instance of enum_type, or None.  If None, you are disallowing a
59        default and requiring that value be non-None.
60    :return: An instance of enum_type
61    :raises TypeError: If value was neither an instance of enum_type, None, nor
62        a string
63    :raises KeyError: If value was a string which couldn't be interpreted as an
64        enum value from enum_type
65    """
66    assert enum_default is None or isinstance(enum_default, enum_type)
67
68    if not isinstance(value, enum_type):
69        if value is None and enum_default is not None:
70            value = enum_default
71        elif isinstance(value, six.string_types):
72            value = enum_type[value.upper()]
73        else:
74            raise TypeError("Not a valid {}: {}".format(
75                enum_type.__name__, value,
76            ))
77
78    return value
79
80
81class STIXdatetime(dt.datetime):
82    """
83    Bundle a datetime with some format-related metadata, so that JSON
84    serialization has the info it needs to produce compliant timestamps.
85    """
86
87    def __new__(cls, *args, **kwargs):
88        precision = _to_enum(
89            kwargs.pop("precision", Precision.ANY),
90            Precision,
91        )
92        precision_constraint = _to_enum(
93            kwargs.pop("precision_constraint", PrecisionConstraint.EXACT),
94            PrecisionConstraint,
95        )
96
97        if isinstance(args[0], dt.datetime):  # Allow passing in a datetime object
98            dttm = args[0]
99            args = (
100                dttm.year, dttm.month, dttm.day, dttm.hour, dttm.minute,
101                dttm.second, dttm.microsecond, dttm.tzinfo,
102            )
103        # self will be an instance of STIXdatetime, not dt.datetime
104        self = dt.datetime.__new__(cls, *args, **kwargs)
105        self.precision = precision
106        self.precision_constraint = precision_constraint
107        return self
108
109    def __repr__(self):
110        return "'%s'" % format_datetime(self)
111
112
113def deduplicate(stix_obj_list):
114    """Deduplicate a list of STIX objects to a unique set.
115
116    Reduces a set of STIX objects to unique set by looking
117    at 'id' and 'modified' fields - as a unique object version
118    is determined by the combination of those fields
119
120    Note: Be aware, as can be seen in the implementation
121    of deduplicate(),that if the "stix_obj_list" argument has
122    multiple STIX objects of the same version, the last object
123    version found in the list will be the one that is returned.
124
125    Args:
126        stix_obj_list (list): list of STIX objects (dicts)
127
128    Returns:
129        A list with a unique set of the passed list of STIX objects.
130
131    """
132    unique_objs = {}
133
134    for obj in stix_obj_list:
135        ver = obj.get("modified") or obj.get("created")
136
137        if ver is None:
138            unique_objs[obj["id"]] = obj
139        else:
140            unique_objs[(obj['id'], ver)] = obj
141
142    return list(unique_objs.values())
143
144
145def get_timestamp():
146    """Return a STIX timestamp of the current date and time."""
147    return STIXdatetime.now(tz=pytz.UTC)
148
149
150def format_datetime(dttm):
151    """Convert a datetime object into a valid STIX timestamp string.
152
153    1. Convert to timezone-aware
154    2. Convert to UTC
155    3. Format in ISO format
156    4. Ensure correct precision
157       a. Add subsecond value if warranted, according to precision settings
158    5. Add "Z"
159
160    """
161
162    if dttm.tzinfo is None or dttm.tzinfo.utcoffset(dttm) is None:
163        # dttm is timezone-naive; assume UTC
164        zoned = pytz.utc.localize(dttm)
165    else:
166        zoned = dttm.astimezone(pytz.utc)
167    ts = zoned.strftime('%Y-%m-%dT%H:%M:%S')
168    precision = getattr(dttm, 'precision', Precision.ANY)
169    precision_constraint = getattr(
170        dttm, 'precision_constraint', PrecisionConstraint.EXACT,
171    )
172
173    frac_seconds_str = ""
174    if precision == Precision.ANY:
175        # No need to truncate; ignore constraint
176        if zoned.microsecond:
177            frac_seconds_str = "{:06d}".format(zoned.microsecond).rstrip("0")
178
179    elif precision == Precision.SECOND:
180        if precision_constraint == PrecisionConstraint.MIN:
181            # second precision, or better.  Winds up being the same as ANY:
182            # just use all our digits
183            if zoned.microsecond:
184                frac_seconds_str = "{:06d}".format(zoned.microsecond)\
185                    .rstrip("0")
186        # exact: ignore microseconds entirely
187
188    else:
189        # precision == millisecond
190        if precision_constraint == PrecisionConstraint.EXACT:
191            # can't rstrip() here or we may lose precision
192            frac_seconds_str = "{:06d}".format(zoned.microsecond)[:3]
193
194        else:
195            # millisecond precision, or better.  So we can rstrip() zeros, but
196            # only to a length of at least 3 digits (ljust() adds zeros back,
197            # if it stripped too far.)
198            frac_seconds_str = "{:06d}"\
199                .format(zoned.microsecond)\
200                .rstrip("0")\
201                .ljust(3, "0")
202
203    ts = "{}{}{}Z".format(
204        ts,
205        "." if frac_seconds_str else "",
206        frac_seconds_str,
207    )
208
209    return ts
210
211
212def parse_into_datetime(
213    value, precision=Precision.ANY,
214    precision_constraint=PrecisionConstraint.EXACT,
215):
216    """
217    Parse a value into a valid STIX timestamp object.  Also, optionally adjust
218    precision of fractional seconds.  This allows alignment with JSON
219    serialization requirements, and helps ensure we're not using extra
220    precision which would be lost upon JSON serialization.  The precision
221    info will be embedded in the returned object, so that JSON serialization
222    will format it correctly.
223
224    :param value: A datetime.datetime or datetime.date instance, or a string
225    :param precision: A precision value: either an instance of the Precision
226        enum, or a string naming one of the enum values (case-insensitive)
227    :param precision_constraint: A precision constraint value: either an
228        instance of the PrecisionConstraint enum, or a string naming one of
229        the enum values (case-insensitive)
230    :return: A STIXdatetime instance, which is a datetime but also carries the
231        precision info necessary to properly JSON-serialize it.
232    """
233    precision = _to_enum(precision, Precision)
234    precision_constraint = _to_enum(precision_constraint, PrecisionConstraint)
235
236    if isinstance(value, dt.date):
237        if hasattr(value, 'hour'):
238            ts = value
239        else:
240            # Add a time component
241            ts = dt.datetime.combine(value, dt.time(0, 0, tzinfo=pytz.utc))
242    else:
243        # value isn't a date or datetime object so assume it's a string
244        fmt = _TIMESTAMP_FORMAT_FRAC if "." in value else _TIMESTAMP_FORMAT
245        try:
246            parsed = dt.datetime.strptime(value, fmt)
247        except (TypeError, ValueError):
248            # Unknown format
249            raise ValueError(
250                "must be a datetime object, date object, or "
251                "timestamp string in a recognizable format.",
252            )
253        if parsed.tzinfo:
254            ts = parsed.astimezone(pytz.utc)
255        else:
256            # Doesn't have timezone info in the string; assume UTC
257            ts = pytz.utc.localize(parsed)
258
259    # Ensure correct precision
260    if precision == Precision.SECOND:
261        if precision_constraint == PrecisionConstraint.EXACT:
262            ts = ts.replace(microsecond=0)
263        # else, no need to modify fractional seconds
264
265    elif precision == Precision.MILLISECOND:
266        if precision_constraint == PrecisionConstraint.EXACT:
267            us = (ts.microsecond // 1000) * 1000
268            ts = ts.replace(microsecond=us)
269        # else: at least millisecond precision: the constraint will affect JSON
270        # formatting, but there's nothing we need to do here.
271
272    # else, precision == Precision.ANY: nothing for us to do.
273
274    return STIXdatetime(
275        ts, precision=precision, precision_constraint=precision_constraint,
276    )
277
278
279def _get_dict(data):
280    """Return data as a dictionary.
281
282    Input can be a dictionary, string, or file-like object.
283    """
284
285    if type(data) is dict:
286        return data
287    else:
288        try:
289            return json.loads(data)
290        except TypeError:
291            pass
292        try:
293            return json.load(data)
294        except AttributeError:
295            pass
296        try:
297            return dict(data)
298        except (ValueError, TypeError):
299            raise ValueError("Cannot convert '%s' to dictionary." % str(data))
300
301
302def get_class_hierarchy_names(obj):
303    """Given an object, return the names of the class hierarchy."""
304    names = []
305    for cls in obj.__class__.__mro__:
306        names.append(cls.__name__)
307    return names
308
309
310def get_type_from_id(stix_id):
311    return stix_id.split('--', 1)[0]
312
313
314def is_marking(obj_or_id):
315    """Determines whether the given object or object ID is/is for a marking
316    definition.
317
318    :param obj_or_id: A STIX object or object ID as a string.
319    :return: True if a marking definition, False otherwise.
320    """
321
322    if isinstance(obj_or_id, (stix2.base._STIXBase, dict)):
323        result = obj_or_id["type"] == "marking-definition"
324    else:
325        # it's a string ID
326        result = obj_or_id.startswith("marking-definition--")
327
328    return result
329