1"""Python STIX2 TAXIICollection Source/Sink"""
2
3from requests.exceptions import HTTPError
4
5from stix2 import v20, v21
6from stix2.base import _STIXBase
7from stix2.datastore import (
8    DataSink, DataSource, DataSourceError, DataStoreMixin,
9)
10from stix2.datastore.filters import Filter, FilterSet, apply_common_filters
11from stix2.parsing import parse
12from stix2.utils import deduplicate
13
14try:
15    from taxii2client.exceptions import ValidationError
16    _taxii2_client = True
17except ImportError:
18    _taxii2_client = False
19
20
21TAXII_FILTERS = ['added_after', 'id', 'type', 'version']
22
23
24class TAXIICollectionStore(DataStoreMixin):
25    """Provides an interface to a local/remote TAXII Collection
26    of STIX data. TAXIICollectionStore is a wrapper
27    around a paired TAXIICollectionSink and TAXIICollectionSource.
28
29    Args:
30        collection (taxii2.Collection): TAXII Collection instance
31        allow_custom (bool): whether to allow custom STIX content to be
32            pushed/retrieved. Defaults to True for TAXIICollectionSource
33            side(retrieving data) and False for TAXIICollectionSink
34            side(pushing data). However, when parameter is supplied, it will
35            be applied to both TAXIICollectionSource/Sink.
36
37    """
38    def __init__(self, collection, allow_custom=None):
39        if allow_custom is None:
40            allow_custom_source = True
41            allow_custom_sink = False
42        else:
43            allow_custom_sink = allow_custom_source = allow_custom
44
45        super(TAXIICollectionStore, self).__init__(
46            source=TAXIICollectionSource(collection, allow_custom=allow_custom_source),
47            sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink),
48        )
49
50
51class TAXIICollectionSink(DataSink):
52    """Provides an interface for pushing STIX objects to a local/remote
53    TAXII Collection endpoint.
54
55    Args:
56        collection (taxii2.Collection): TAXII2 Collection instance
57        allow_custom (bool): Whether to allow custom STIX content to be
58            added to the TAXIICollectionSink. Default: False
59
60    """
61    def __init__(self, collection, allow_custom=False):
62        super(TAXIICollectionSink, self).__init__()
63        if not _taxii2_client:
64            raise ImportError("taxii2client library is required for usage of TAXIICollectionSink")
65
66        try:
67            if collection.can_write:
68                self.collection = collection
69            else:
70                raise DataSourceError(
71                    "The TAXII Collection object provided does not have write access"
72                    " to the underlying linked Collection resource",
73                )
74
75        except (HTTPError, ValidationError) as e:
76            raise DataSourceError(
77                "The underlying TAXII Collection resource defined in the supplied TAXII"
78                " Collection object provided could not be reached. Receved error:", e,
79            )
80
81        self.allow_custom = allow_custom
82
83    def add(self, stix_data, version=None):
84        """Add/push STIX content to TAXII Collection endpoint
85
86        Args:
87            stix_data (STIX object OR dict OR str OR list): valid STIX2
88                content in a STIX object (or Bundle), STIX object dict (or
89                Bundle dict), or a STIX2 json encoded string, or list of
90                any of the following.
91            version (str): If present, it forces the parser to use the version
92                provided. Otherwise, the library will make the best effort based
93                on checking the "spec_version" property.
94
95        """
96        if isinstance(stix_data, _STIXBase):
97            # adding python STIX object
98            if stix_data['type'] == 'bundle':
99                bundle = stix_data.serialize(encoding='utf-8', ensure_ascii=False)
100            elif 'spec_version' in stix_data:
101                # If the spec_version is present, use new Bundle object...
102                bundle = v21.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False)
103            else:
104                bundle = v20.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False)
105
106        elif isinstance(stix_data, dict):
107            # adding python dict (of either Bundle or STIX obj)
108            if stix_data['type'] == 'bundle':
109                bundle = parse(stix_data, allow_custom=self.allow_custom, version=version).serialize(encoding='utf-8', ensure_ascii=False)
110            elif 'spec_version' in stix_data:
111                # If the spec_version is present, use new Bundle object...
112                bundle = v21.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False)
113            else:
114                bundle = v20.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False)
115
116        elif isinstance(stix_data, list):
117            # adding list of something - recurse on each
118            for obj in stix_data:
119                self.add(obj, version=version)
120            return
121
122        elif isinstance(stix_data, str):
123            # adding json encoded string of STIX content
124            stix_data = parse(stix_data, allow_custom=self.allow_custom, version=version)
125            if stix_data['type'] == 'bundle':
126                bundle = stix_data.serialize(encoding='utf-8', ensure_ascii=False)
127            elif 'spec_version' in stix_data:
128                # If the spec_version is present, use new Bundle object...
129                bundle = v21.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False)
130            else:
131                bundle = v20.Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding='utf-8', ensure_ascii=False)
132
133        else:
134            raise TypeError("stix_data must be as STIX object(or list of),json formatted STIX (or list of), or a json formatted STIX bundle")
135
136        self.collection.add_objects(bundle)
137
138
139class TAXIICollectionSource(DataSource):
140    """Provides an interface for searching/retrieving STIX objects
141    from a local/remote TAXII Collection endpoint.
142
143    Args:
144        collection (taxii2.Collection): TAXII Collection instance
145        allow_custom (bool): Whether to allow custom STIX content to be
146            added to the FileSystemSink. Default: True
147
148    """
149    def __init__(self, collection, allow_custom=True):
150        super(TAXIICollectionSource, self).__init__()
151        if not _taxii2_client:
152            raise ImportError("taxii2client library is required for usage of TAXIICollectionSource")
153
154        try:
155            if collection.can_read:
156                self.collection = collection
157            else:
158                raise DataSourceError(
159                    "The TAXII Collection object provided does not have read access"
160                    " to the underlying linked Collection resource",
161                )
162
163        except (HTTPError, ValidationError) as e:
164            raise DataSourceError(
165                "The underlying TAXII Collection resource defined in the supplied TAXII"
166                " Collection object provided could not be reached. Recieved error:", e,
167            )
168
169        self.allow_custom = allow_custom
170
171    def get(self, stix_id, version=None, _composite_filters=None):
172        """Retrieve STIX object from local/remote STIX Collection
173        endpoint.
174
175        Args:
176            stix_id (str): The STIX ID of the STIX object to be retrieved.
177            version (str): If present, it forces the parser to use the version
178                provided. Otherwise, the library will make the best effort based
179                on checking the "spec_version" property.
180            _composite_filters (FilterSet): collection of filters passed from
181                the parent CompositeDataSource, not user supplied
182
183        Returns:
184            (STIX object): STIX object that has the supplied STIX ID.
185                The STIX object is received from TAXII has dict, parsed into
186                a python STIX object and then returned
187
188        """
189        # combine all query filters
190        query = FilterSet()
191
192        if self.filters:
193            query.add(self.filters)
194        if _composite_filters:
195            query.add(_composite_filters)
196
197        # don't extract TAXII filters from query (to send to TAXII endpoint)
198        # as directly retrieving a STIX object by ID
199        try:
200            stix_objs = self.collection.get_object(stix_id)['objects']
201            stix_obj = list(apply_common_filters(stix_objs, query))
202
203        except HTTPError as e:
204            if e.response.status_code == 404:
205                # if resource not found or access is denied from TAXII server,
206                # return None
207                stix_obj = []
208            else:
209                raise DataSourceError("TAXII Collection resource returned error", e)
210
211        if len(stix_obj):
212            stix_obj = parse(stix_obj[0], allow_custom=self.allow_custom, version=version)
213            if stix_obj['id'] != stix_id:
214                # check - was added to handle erroneous TAXII servers
215                stix_obj = None
216        else:
217            stix_obj = None
218
219        return stix_obj
220
221    def all_versions(self, stix_id, version=None, _composite_filters=None):
222        """Retrieve STIX object from local/remote TAXII Collection
223        endpoint, all versions of it
224
225        Args:
226            stix_id (str): The STIX ID of the STIX objects to be retrieved.
227            version (str): If present, it forces the parser to use the version
228                provided. Otherwise, the library will make the best effort based
229                on checking the "spec_version" property.
230            _composite_filters (FilterSet): collection of filters passed from the parent
231                CompositeDataSource, not user supplied
232
233        Returns:
234            (see query() as all_versions() is just a wrapper)
235
236        """
237        # make query in TAXII query format since 'id' is TAXII field
238        query = [
239            Filter('id', '=', stix_id),
240            Filter('version', '=', 'all'),
241        ]
242
243        all_data = self.query(query=query, _composite_filters=_composite_filters)
244
245        # parse STIX objects from TAXII returned json
246        all_data = [parse(stix_obj, allow_custom=self.allow_custom, version=version) for stix_obj in all_data]
247
248        # check - was added to handle erroneous TAXII servers
249        all_data_clean = [stix_obj for stix_obj in all_data if stix_obj['id'] == stix_id]
250
251        return all_data_clean
252
253    def query(self, query=None, version=None, _composite_filters=None):
254        """Search and retreive STIX objects based on the complete query
255
256        A "complete query" includes the filters from the query, the filters
257        attached to MemorySource, and any filters passed from a
258        CompositeDataSource (i.e. _composite_filters)
259
260        Args:
261            query (list): list of filters to search on
262            version (str): If present, it forces the parser to use the version
263                provided. Otherwise, the library will make the best effort based
264                on checking the "spec_version" property.
265            _composite_filters (FilterSet): collection of filters passed from
266                the CompositeDataSource, not user supplied
267
268        Returns:
269            (list): list of STIX objects that matches the supplied
270                query. The STIX objects are received from TAXII as dicts,
271                parsed into python STIX objects and then returned.
272
273        """
274        query = FilterSet(query)
275
276        # combine all query filters
277        if self.filters:
278            query.add(self.filters)
279        if _composite_filters:
280            query.add(_composite_filters)
281
282        # parse taxii query params (that can be applied remotely)
283        taxii_filters = self._parse_taxii_filters(query)
284
285        # taxii2client requires query params as keywords
286        taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)
287
288        # query TAXII collection
289        try:
290            all_data = self.collection.get_objects(**taxii_filters_dict).get('objects', [])
291
292            # deduplicate data (before filtering as reduces wasted filtering)
293            all_data = deduplicate(all_data)
294
295            # apply local (CompositeDataSource, TAXIICollectionSource and query) filters
296            query.remove(taxii_filters)
297            all_data = list(apply_common_filters(all_data, query))
298
299        except HTTPError as e:
300            # if resources not found or access is denied from TAXII server, return empty list
301            if e.response.status_code == 404:
302                raise DataSourceError(
303                    "The requested STIX objects for the TAXII Collection resource defined in"
304                    " the supplied TAXII Collection object are either not found or access is"
305                    " denied. Received error: ", e,
306                )
307
308        # parse python STIX objects from the STIX object dicts
309        stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data]
310
311        return stix_objs
312
313    def _parse_taxii_filters(self, query):
314        """Parse out TAXII filters that the TAXII server can filter on
315
316        Does not put in TAXII spec format as the TAXII2Client (that we use)
317        does this for us.
318
319        Note:
320            Currently, the TAXII2Client can handle TAXII filters where the
321            filter value is list, as both a comma-seperated string or python
322            list.
323
324            For instance - "?match[type]=indicator,sighting" can be in a
325            filter in any of these formats:
326
327            Filter("type", "<any op>", "indicator,sighting")
328            Filter("type", "<any op>", ["indicator", "sighting"])
329
330        Args:
331            query (list): list of filters to extract which ones are TAXII
332                specific.
333
334        Returns:
335            A list of TAXII filters that meet the TAXII filtering parameters.
336
337        """
338        taxii_filters = []
339
340        for filter_ in query:
341            if filter_.property in TAXII_FILTERS:
342                taxii_filters.append(filter_)
343
344        return taxii_filters
345