1"""Utility functions and classes for the STIX2 library.""" 2 3import datetime as dt 4import enum 5import json 6import re 7 8import pytz 9import six 10 11import stix2 12 13# Sentinel value for properties that should be set to the current time. 14# We can't use the standard 'default' approach, since if there are multiple 15# timestamps in a single object, the timestamps will vary by a few microseconds. 16NOW = object() 17 18PREFIX_21_REGEX = re.compile(r'^[a-z].*') 19 20_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%SZ" 21_TIMESTAMP_FORMAT_FRAC = "%Y-%m-%dT%H:%M:%S.%fZ" 22 23 24class Precision(enum.Enum): 25 """ 26 Timestamp format precisions. 27 """ 28 # auto() wasn't introduced until Python 3.6. 29 ANY = 1 30 SECOND = 2 31 MILLISECOND = 3 32 33 34class PrecisionConstraint(enum.Enum): 35 """ 36 Timestamp precision constraints. These affect how the Precision 37 values are applied when formatting a timestamp. 38 39 These constraints don't really make sense with the ANY precision, so they 40 have no effect in that case. 41 """ 42 EXACT = 1 # format must have exactly the given precision 43 MIN = 2 # format must have at least the given precision 44 # no need for a MAX constraint yet 45 46 47def _to_enum(value, enum_type, enum_default=None): 48 """ 49 Detect and convert strings to enums and None to a default enum. This 50 allows use of strings and None in APIs, while enforcing the enum type: if 51 you use a string, it must name a valid enum value. This implementation is 52 case-insensitive. 53 54 :param value: A value to be interpreted as an enum (string, Enum instance, 55 or None). If an Enum instance, it must be an instance of enum_type. 56 :param enum_type: The enum type which strings will be interpreted against 57 :param enum_default: The default enum to use if value is None. Must be 58 an instance of enum_type, or None. If None, you are disallowing a 59 default and requiring that value be non-None. 60 :return: An instance of enum_type 61 :raises TypeError: If value was neither an instance of enum_type, None, nor 62 a string 63 :raises KeyError: If value was a string which couldn't be interpreted as an 64 enum value from enum_type 65 """ 66 assert enum_default is None or isinstance(enum_default, enum_type) 67 68 if not isinstance(value, enum_type): 69 if value is None and enum_default is not None: 70 value = enum_default 71 elif isinstance(value, six.string_types): 72 value = enum_type[value.upper()] 73 else: 74 raise TypeError("Not a valid {}: {}".format( 75 enum_type.__name__, value, 76 )) 77 78 return value 79 80 81class STIXdatetime(dt.datetime): 82 """ 83 Bundle a datetime with some format-related metadata, so that JSON 84 serialization has the info it needs to produce compliant timestamps. 85 """ 86 87 def __new__(cls, *args, **kwargs): 88 precision = _to_enum( 89 kwargs.pop("precision", Precision.ANY), 90 Precision, 91 ) 92 precision_constraint = _to_enum( 93 kwargs.pop("precision_constraint", PrecisionConstraint.EXACT), 94 PrecisionConstraint, 95 ) 96 97 if isinstance(args[0], dt.datetime): # Allow passing in a datetime object 98 dttm = args[0] 99 args = ( 100 dttm.year, dttm.month, dttm.day, dttm.hour, dttm.minute, 101 dttm.second, dttm.microsecond, dttm.tzinfo, 102 ) 103 # self will be an instance of STIXdatetime, not dt.datetime 104 self = dt.datetime.__new__(cls, *args, **kwargs) 105 self.precision = precision 106 self.precision_constraint = precision_constraint 107 return self 108 109 def __repr__(self): 110 return "'%s'" % format_datetime(self) 111 112 113def deduplicate(stix_obj_list): 114 """Deduplicate a list of STIX objects to a unique set. 115 116 Reduces a set of STIX objects to unique set by looking 117 at 'id' and 'modified' fields - as a unique object version 118 is determined by the combination of those fields 119 120 Note: Be aware, as can be seen in the implementation 121 of deduplicate(),that if the "stix_obj_list" argument has 122 multiple STIX objects of the same version, the last object 123 version found in the list will be the one that is returned. 124 125 Args: 126 stix_obj_list (list): list of STIX objects (dicts) 127 128 Returns: 129 A list with a unique set of the passed list of STIX objects. 130 131 """ 132 unique_objs = {} 133 134 for obj in stix_obj_list: 135 ver = obj.get("modified") or obj.get("created") 136 137 if ver is None: 138 unique_objs[obj["id"]] = obj 139 else: 140 unique_objs[(obj['id'], ver)] = obj 141 142 return list(unique_objs.values()) 143 144 145def get_timestamp(): 146 """Return a STIX timestamp of the current date and time.""" 147 return STIXdatetime.now(tz=pytz.UTC) 148 149 150def format_datetime(dttm): 151 """Convert a datetime object into a valid STIX timestamp string. 152 153 1. Convert to timezone-aware 154 2. Convert to UTC 155 3. Format in ISO format 156 4. Ensure correct precision 157 a. Add subsecond value if warranted, according to precision settings 158 5. Add "Z" 159 160 """ 161 162 if dttm.tzinfo is None or dttm.tzinfo.utcoffset(dttm) is None: 163 # dttm is timezone-naive; assume UTC 164 zoned = pytz.utc.localize(dttm) 165 else: 166 zoned = dttm.astimezone(pytz.utc) 167 ts = zoned.strftime('%Y-%m-%dT%H:%M:%S') 168 precision = getattr(dttm, 'precision', Precision.ANY) 169 precision_constraint = getattr( 170 dttm, 'precision_constraint', PrecisionConstraint.EXACT, 171 ) 172 173 frac_seconds_str = "" 174 if precision == Precision.ANY: 175 # No need to truncate; ignore constraint 176 if zoned.microsecond: 177 frac_seconds_str = "{:06d}".format(zoned.microsecond).rstrip("0") 178 179 elif precision == Precision.SECOND: 180 if precision_constraint == PrecisionConstraint.MIN: 181 # second precision, or better. Winds up being the same as ANY: 182 # just use all our digits 183 if zoned.microsecond: 184 frac_seconds_str = "{:06d}".format(zoned.microsecond)\ 185 .rstrip("0") 186 # exact: ignore microseconds entirely 187 188 else: 189 # precision == millisecond 190 if precision_constraint == PrecisionConstraint.EXACT: 191 # can't rstrip() here or we may lose precision 192 frac_seconds_str = "{:06d}".format(zoned.microsecond)[:3] 193 194 else: 195 # millisecond precision, or better. So we can rstrip() zeros, but 196 # only to a length of at least 3 digits (ljust() adds zeros back, 197 # if it stripped too far.) 198 frac_seconds_str = "{:06d}"\ 199 .format(zoned.microsecond)\ 200 .rstrip("0")\ 201 .ljust(3, "0") 202 203 ts = "{}{}{}Z".format( 204 ts, 205 "." if frac_seconds_str else "", 206 frac_seconds_str, 207 ) 208 209 return ts 210 211 212def parse_into_datetime( 213 value, precision=Precision.ANY, 214 precision_constraint=PrecisionConstraint.EXACT, 215): 216 """ 217 Parse a value into a valid STIX timestamp object. Also, optionally adjust 218 precision of fractional seconds. This allows alignment with JSON 219 serialization requirements, and helps ensure we're not using extra 220 precision which would be lost upon JSON serialization. The precision 221 info will be embedded in the returned object, so that JSON serialization 222 will format it correctly. 223 224 :param value: A datetime.datetime or datetime.date instance, or a string 225 :param precision: A precision value: either an instance of the Precision 226 enum, or a string naming one of the enum values (case-insensitive) 227 :param precision_constraint: A precision constraint value: either an 228 instance of the PrecisionConstraint enum, or a string naming one of 229 the enum values (case-insensitive) 230 :return: A STIXdatetime instance, which is a datetime but also carries the 231 precision info necessary to properly JSON-serialize it. 232 """ 233 precision = _to_enum(precision, Precision) 234 precision_constraint = _to_enum(precision_constraint, PrecisionConstraint) 235 236 if isinstance(value, dt.date): 237 if hasattr(value, 'hour'): 238 ts = value 239 else: 240 # Add a time component 241 ts = dt.datetime.combine(value, dt.time(0, 0, tzinfo=pytz.utc)) 242 else: 243 # value isn't a date or datetime object so assume it's a string 244 fmt = _TIMESTAMP_FORMAT_FRAC if "." in value else _TIMESTAMP_FORMAT 245 try: 246 parsed = dt.datetime.strptime(value, fmt) 247 except (TypeError, ValueError): 248 # Unknown format 249 raise ValueError( 250 "must be a datetime object, date object, or " 251 "timestamp string in a recognizable format.", 252 ) 253 if parsed.tzinfo: 254 ts = parsed.astimezone(pytz.utc) 255 else: 256 # Doesn't have timezone info in the string; assume UTC 257 ts = pytz.utc.localize(parsed) 258 259 # Ensure correct precision 260 if precision == Precision.SECOND: 261 if precision_constraint == PrecisionConstraint.EXACT: 262 ts = ts.replace(microsecond=0) 263 # else, no need to modify fractional seconds 264 265 elif precision == Precision.MILLISECOND: 266 if precision_constraint == PrecisionConstraint.EXACT: 267 us = (ts.microsecond // 1000) * 1000 268 ts = ts.replace(microsecond=us) 269 # else: at least millisecond precision: the constraint will affect JSON 270 # formatting, but there's nothing we need to do here. 271 272 # else, precision == Precision.ANY: nothing for us to do. 273 274 return STIXdatetime( 275 ts, precision=precision, precision_constraint=precision_constraint, 276 ) 277 278 279def _get_dict(data): 280 """Return data as a dictionary. 281 282 Input can be a dictionary, string, or file-like object. 283 """ 284 285 if type(data) is dict: 286 return data 287 else: 288 try: 289 return json.loads(data) 290 except TypeError: 291 pass 292 try: 293 return json.load(data) 294 except AttributeError: 295 pass 296 try: 297 return dict(data) 298 except (ValueError, TypeError): 299 raise ValueError("Cannot convert '%s' to dictionary." % str(data)) 300 301 302def get_class_hierarchy_names(obj): 303 """Given an object, return the names of the class hierarchy.""" 304 names = [] 305 for cls in obj.__class__.__mro__: 306 names.append(cls.__name__) 307 return names 308 309 310def get_type_from_id(stix_id): 311 return stix_id.split('--', 1)[0] 312 313 314def is_marking(obj_or_id): 315 """Determines whether the given object or object ID is/is for a marking 316 definition. 317 318 :param obj_or_id: A STIX object or object ID as a string. 319 :return: True if a marking definition, False otherwise. 320 """ 321 322 if isinstance(obj_or_id, (stix2.base._STIXBase, dict)): 323 result = obj_or_id["type"] == "marking-definition" 324 else: 325 # it's a string ID 326 result = obj_or_id.startswith("marking-definition--") 327 328 return result 329