1"""Python STIX2 TAXIICollection Source/Sink""" 2 3from requests.exceptions import HTTPError 4 5from stix2 import v20, v21 6from stix2.base import _STIXBase 7from stix2.datastore import ( 8 DataSink, DataSource, DataSourceError, DataStoreMixin, 9) 10from stix2.datastore.filters import Filter, FilterSet, apply_common_filters 11from stix2.parsing import parse 12from stix2.utils import deduplicate 13 14try: 15 from taxii2client.exceptions import ValidationError 16 _taxii2_client = True 17except ImportError: 18 _taxii2_client = False 19 20 21TAXII_FILTERS = ['added_after', 'id', 'type', 'version'] 22 23 24class TAXIICollectionStore(DataStoreMixin): 25 """Provides an interface to a local/remote TAXII Collection 26 of STIX data. TAXIICollectionStore is a wrapper 27 around a paired TAXIICollectionSink and TAXIICollectionSource. 28 29 Args: 30 collection (taxii2.Collection): TAXII Collection instance 31 allow_custom (bool): whether to allow custom STIX content to be 32 pushed/retrieved. Defaults to True for TAXIICollectionSource 33 side(retrieving data) and False for TAXIICollectionSink 34 side(pushing data). However, when parameter is supplied, it will 35 be applied to both TAXIICollectionSource/Sink. 36 37 """ 38 def __init__(self, collection, allow_custom=None): 39 if allow_custom is None: 40 allow_custom_source = True 41 allow_custom_sink = False 42 else: 43 allow_custom_sink = allow_custom_source = allow_custom 44 45 super(TAXIICollectionStore, self).__init__( 46 source=TAXIICollectionSource(collection, allow_custom=allow_custom_source), 47 sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink), 48 ) 49 50 51class TAXIICollectionSink(DataSink): 52 """Provides an interface for pushing STIX objects to a local/remote 53 TAXII Collection endpoint. 54 55 Args: 56 collection (taxii2.Collection): TAXII2 Collection instance 57 allow_custom (bool): Whether to allow custom STIX content to be 58 added to the TAXIICollectionSink. Default: False 59 60 """ 61 def __init__(self, collection, allow_custom=False): 62 super(TAXIICollectionSink, self).__init__() 63 if not _taxii2_client: 64 raise ImportError("taxii2client library is required for usage of TAXIICollectionSink") 65 66 try: 67 if collection.can_write: 68 self.collection = collection 69 else: 70 raise DataSourceError( 71 "The TAXII Collection object provided does not have write access" 72 " to the underlying linked Collection resource", 73 ) 74 75 except (HTTPError, ValidationError) as e: 76 raise DataSourceError( 77 "The underlying TAXII Collection resource defined in the supplied TAXII" 78 " Collection object provided could not be reached. Receved error:", e, 79 ) 80 81 self.allow_custom = allow_custom 82 83 def add(self, stix_data, version=None): 84 """Add/push STIX content to TAXII Collection endpoint 85 86 Args: 87 stix_data (STIX object OR dict OR str OR list): valid STIX2 88 content in a STIX object (or Bundle), STIX object dict (or 89 Bundle dict), or a STIX2 json encoded string, or list of 90 any of the following. 91 version (str): If present, it forces the parser to use the version 92 provided. Otherwise, the library will make the best effort based 93 on checking the "spec_version" property. 94 95 """ 96 if isinstance(stix_data, _STIXBase): 97 # adding python STIX object 98 if stix_data['type'] == 'bundle': 99 bundle = stix_data.serialize(encoding='utf-8', ensure_ascii=False) 100 elif 'spec_version' in stix_data: 101 # If the spec_version is present, use new Bundle object... 102 bundle = v21.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False) 103 else: 104 bundle = v20.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False) 105 106 elif isinstance(stix_data, dict): 107 # adding python dict (of either Bundle or STIX obj) 108 if stix_data['type'] == 'bundle': 109 bundle = parse(stix_data, allow_custom=self.allow_custom, version=version).serialize(encoding='utf-8', ensure_ascii=False) 110 elif 'spec_version' in stix_data: 111 # If the spec_version is present, use new Bundle object... 112 bundle = v21.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False) 113 else: 114 bundle = v20.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False) 115 116 elif isinstance(stix_data, list): 117 # adding list of something - recurse on each 118 for obj in stix_data: 119 self.add(obj, version=version) 120 return 121 122 elif isinstance(stix_data, str): 123 # adding json encoded string of STIX content 124 stix_data = parse(stix_data, allow_custom=self.allow_custom, version=version) 125 if stix_data['type'] == 'bundle': 126 bundle = stix_data.serialize(encoding='utf-8', ensure_ascii=False) 127 elif 'spec_version' in stix_data: 128 # If the spec_version is present, use new Bundle object... 129 bundle = v21.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False) 130 else: 131 bundle = v20.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False) 132 133 else: 134 raise TypeError("stix_data must be as STIX object(or list of),json formatted STIX (or list of), or a json formatted STIX bundle") 135 136 self.collection.add_objects(bundle) 137 138 139class TAXIICollectionSource(DataSource): 140 """Provides an interface for searching/retrieving STIX objects 141 from a local/remote TAXII Collection endpoint. 142 143 Args: 144 collection (taxii2.Collection): TAXII Collection instance 145 allow_custom (bool): Whether to allow custom STIX content to be 146 added to the FileSystemSink. Default: True 147 148 """ 149 def __init__(self, collection, allow_custom=True): 150 super(TAXIICollectionSource, self).__init__() 151 if not _taxii2_client: 152 raise ImportError("taxii2client library is required for usage of TAXIICollectionSource") 153 154 try: 155 if collection.can_read: 156 self.collection = collection 157 else: 158 raise DataSourceError( 159 "The TAXII Collection object provided does not have read access" 160 " to the underlying linked Collection resource", 161 ) 162 163 except (HTTPError, ValidationError) as e: 164 raise DataSourceError( 165 "The underlying TAXII Collection resource defined in the supplied TAXII" 166 " Collection object provided could not be reached. Recieved error:", e, 167 ) 168 169 self.allow_custom = allow_custom 170 171 def get(self, stix_id, version=None, _composite_filters=None): 172 """Retrieve STIX object from local/remote STIX Collection 173 endpoint. 174 175 Args: 176 stix_id (str): The STIX ID of the STIX object to be retrieved. 177 version (str): If present, it forces the parser to use the version 178 provided. Otherwise, the library will make the best effort based 179 on checking the "spec_version" property. 180 _composite_filters (FilterSet): collection of filters passed from 181 the parent CompositeDataSource, not user supplied 182 183 Returns: 184 (STIX object): STIX object that has the supplied STIX ID. 185 The STIX object is received from TAXII has dict, parsed into 186 a python STIX object and then returned 187 188 """ 189 # combine all query filters 190 query = FilterSet() 191 192 if self.filters: 193 query.add(self.filters) 194 if _composite_filters: 195 query.add(_composite_filters) 196 197 # don't extract TAXII filters from query (to send to TAXII endpoint) 198 # as directly retrieving a STIX object by ID 199 try: 200 stix_objs = self.collection.get_object(stix_id)['objects'] 201 stix_obj = list(apply_common_filters(stix_objs, query)) 202 203 except HTTPError as e: 204 if e.response.status_code == 404: 205 # if resource not found or access is denied from TAXII server, 206 # return None 207 stix_obj = [] 208 else: 209 raise DataSourceError("TAXII Collection resource returned error", e) 210 211 if len(stix_obj): 212 stix_obj = parse(stix_obj[0], allow_custom=self.allow_custom, version=version) 213 if stix_obj['id'] != stix_id: 214 # check - was added to handle erroneous TAXII servers 215 stix_obj = None 216 else: 217 stix_obj = None 218 219 return stix_obj 220 221 def all_versions(self, stix_id, version=None, _composite_filters=None): 222 """Retrieve STIX object from local/remote TAXII Collection 223 endpoint, all versions of it 224 225 Args: 226 stix_id (str): The STIX ID of the STIX objects to be retrieved. 227 version (str): If present, it forces the parser to use the version 228 provided. Otherwise, the library will make the best effort based 229 on checking the "spec_version" property. 230 _composite_filters (FilterSet): collection of filters passed from the parent 231 CompositeDataSource, not user supplied 232 233 Returns: 234 (see query() as all_versions() is just a wrapper) 235 236 """ 237 # make query in TAXII query format since 'id' is TAXII field 238 query = [ 239 Filter('id', '=', stix_id), 240 Filter('version', '=', 'all'), 241 ] 242 243 all_data = self.query(query=query, _composite_filters=_composite_filters) 244 245 # parse STIX objects from TAXII returned json 246 all_data = [parse(stix_obj, allow_custom=self.allow_custom, version=version) for stix_obj in all_data] 247 248 # check - was added to handle erroneous TAXII servers 249 all_data_clean = [stix_obj for stix_obj in all_data if stix_obj['id'] == stix_id] 250 251 return all_data_clean 252 253 def query(self, query=None, version=None, _composite_filters=None): 254 """Search and retreive STIX objects based on the complete query 255 256 A "complete query" includes the filters from the query, the filters 257 attached to MemorySource, and any filters passed from a 258 CompositeDataSource (i.e. _composite_filters) 259 260 Args: 261 query (list): list of filters to search on 262 version (str): If present, it forces the parser to use the version 263 provided. Otherwise, the library will make the best effort based 264 on checking the "spec_version" property. 265 _composite_filters (FilterSet): collection of filters passed from 266 the CompositeDataSource, not user supplied 267 268 Returns: 269 (list): list of STIX objects that matches the supplied 270 query. The STIX objects are received from TAXII as dicts, 271 parsed into python STIX objects and then returned. 272 273 """ 274 query = FilterSet(query) 275 276 # combine all query filters 277 if self.filters: 278 query.add(self.filters) 279 if _composite_filters: 280 query.add(_composite_filters) 281 282 # parse taxii query params (that can be applied remotely) 283 taxii_filters = self._parse_taxii_filters(query) 284 285 # taxii2client requires query params as keywords 286 taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters) 287 288 # query TAXII collection 289 try: 290 all_data = self.collection.get_objects(**taxii_filters_dict).get('objects', []) 291 292 # deduplicate data (before filtering as reduces wasted filtering) 293 all_data = deduplicate(all_data) 294 295 # apply local (CompositeDataSource, TAXIICollectionSource and query) filters 296 query.remove(taxii_filters) 297 all_data = list(apply_common_filters(all_data, query)) 298 299 except HTTPError as e: 300 # if resources not found or access is denied from TAXII server, return empty list 301 if e.response.status_code == 404: 302 raise DataSourceError( 303 "The requested STIX objects for the TAXII Collection resource defined in" 304 " the supplied TAXII Collection object are either not found or access is" 305 " denied. Received error: ", e, 306 ) 307 308 # parse python STIX objects from the STIX object dicts 309 stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data] 310 311 return stix_objs 312 313 def _parse_taxii_filters(self, query): 314 """Parse out TAXII filters that the TAXII server can filter on 315 316 Does not put in TAXII spec format as the TAXII2Client (that we use) 317 does this for us. 318 319 Note: 320 Currently, the TAXII2Client can handle TAXII filters where the 321 filter value is list, as both a comma-seperated string or python 322 list. 323 324 For instance - "?match[type]=indicator,sighting" can be in a 325 filter in any of these formats: 326 327 Filter("type", "<any op>", "indicator,sighting") 328 Filter("type", "<any op>", ["indicator", "sighting"]) 329 330 Args: 331 query (list): list of filters to extract which ones are TAXII 332 specific. 333 334 Returns: 335 A list of TAXII filters that meet the TAXII filtering parameters. 336 337 """ 338 taxii_filters = [] 339 340 for filter_ in query: 341 if filter_.property in TAXII_FILTERS: 342 taxii_filters.append(filter_) 343 344 return taxii_filters 345