1# Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"). You
4# may not use this file except in compliance with the License. A copy of
5# the License is located at
6#
7# http://aws.amazon.com/apache2.0/
8#
9# or in the "license" file accompanying this file. This file is
10# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11# ANY KIND, either express or implied. See the License for the specific
12# language governing permissions and limitations under the License.
13"""Response parsers for the various protocol types.
14
15The module contains classes that can take an HTTP response, and given
16an output shape, parse the response into a dict according to the
17rules in the output shape.
18
19There are many similarities amongst the different protocols with regard
20to response parsing, and the code is structured in a way to avoid
21code duplication when possible.  The diagram below is a diagram
22showing the inheritance hierarchy of the response classes.
23
24::
25
26
27
28                                 +--------------+
29                                 |ResponseParser|
30                                 +--------------+
31                                    ^    ^    ^
32               +--------------------+    |    +-------------------+
33               |                         |                        |
34    +----------+----------+       +------+-------+        +-------+------+
35    |BaseXMLResponseParser|       |BaseRestParser|        |BaseJSONParser|
36    +---------------------+       +--------------+        +--------------+
37              ^         ^          ^           ^           ^        ^
38              |         |          |           |           |        |
39              |         |          |           |           |        |
40              |        ++----------+-+       +-+-----------++       |
41              |        |RestXMLParser|       |RestJSONParser|       |
42        +-----+-----+  +-------------+       +--------------+  +----+-----+
43        |QueryParser|                                          |JSONParser|
44        +-----------+                                          +----------+
45
46
47The diagram above shows that there is a base class, ``ResponseParser`` that
48contains logic that is similar amongst all the different protocols (``query``,
49``json``, ``rest-json``, ``rest-xml``).  Amongst the various services there
50is shared logic that can be grouped several ways:
51
52* The ``query`` and ``rest-xml`` both have XML bodies that are parsed in the
53  same way.
54* The ``json`` and ``rest-json`` protocols both have JSON bodies that are
55  parsed in the same way.
56* The ``rest-json`` and ``rest-xml`` protocols have additional attributes
57  besides body parameters that are parsed the same (headers, query string,
58  status code).
59
60This is reflected in the class diagram above.  The ``BaseXMLResponseParser``
61and the BaseJSONParser contain logic for parsing the XML/JSON body,
62and the BaseRestParser contains logic for parsing out attributes that
63come from other parts of the HTTP response.  Classes like the
64``RestXMLParser`` inherit from the ``BaseXMLResponseParser`` to get the
65XML body parsing logic and the ``BaseRestParser`` to get the HTTP
66header/status code/query string parsing.
67
68Additionally, there are event stream parsers that are used by the other parsers
69to wrap streaming bodies that represent a stream of events. The
70BaseEventStreamParser extends from ResponseParser and defines the logic for
71parsing values from the headers and payload of a message from the underlying
72binary encoding protocol. Currently, event streams support parsing bodies
73encoded as JSON and XML through the following hierarchy.
74
75
76                                  +--------------+
77                                  |ResponseParser|
78                                  +--------------+
79                                    ^    ^    ^
80               +--------------------+    |    +------------------+
81               |                         |                       |
82    +----------+----------+   +----------+----------+    +-------+------+
83    |BaseXMLResponseParser|   |BaseEventStreamParser|    |BaseJSONParser|
84    +---------------------+   +---------------------+    +--------------+
85                     ^                ^        ^                 ^
86                     |                |        |                 |
87                     |                |        |                 |
88                   +-+----------------+-+    +-+-----------------+-+
89                   |EventStreamXMLParser|    |EventStreamJSONParser|
90                   +--------------------+    +---------------------+
91
92Return Values
93=============
94
95Each call to ``parse()`` returns a dict has this form::
96
97    Standard Response
98
99    {
100      "ResponseMetadata": {"RequestId": <requestid>}
101      <response keys>
102    }
103
104    Error response
105
106    {
107      "ResponseMetadata": {"RequestId": <requestid>}
108      "Error": {
109        "Code": <string>,
110        "Message": <string>,
111        "Type": <string>,
112        <additional keys>
113      }
114    }
115
116"""
117import re
118import base64
119import json
120import logging
121
122from botocore.compat import six, ETree, XMLParseError
123from botocore.eventstream import EventStream, NoInitialResponseError
124
125from botocore.utils import parse_timestamp, merge_dicts, \
126    is_json_value_header, lowercase_dict
127
128LOG = logging.getLogger(__name__)
129
130DEFAULT_TIMESTAMP_PARSER = parse_timestamp
131
132
133class ResponseParserFactory(object):
134    def __init__(self):
135        self._defaults = {}
136
137    def set_parser_defaults(self, **kwargs):
138        """Set default arguments when a parser instance is created.
139
140        You can specify any kwargs that are allowed by a ResponseParser
141        class.  There are currently two arguments:
142
143            * timestamp_parser - A callable that can parse a timestamp string
144            * blob_parser - A callable that can parse a blob type
145
146        """
147        self._defaults.update(kwargs)
148
149    def create_parser(self, protocol_name):
150        parser_cls = PROTOCOL_PARSERS[protocol_name]
151        return parser_cls(**self._defaults)
152
153
154def create_parser(protocol):
155    return ResponseParserFactory().create_parser(protocol)
156
157
158def _text_content(func):
159    # This decorator hides the difference between
160    # an XML node with text or a plain string.  It's used
161    # to ensure that scalar processing operates only on text
162    # strings, which allows the same scalar handlers to be used
163    # for XML nodes from the body and HTTP headers.
164    def _get_text_content(self, shape, node_or_string):
165        if hasattr(node_or_string, 'text'):
166            text = node_or_string.text
167            if text is None:
168                # If an XML node is empty <foo></foo>,
169                # we want to parse that as an empty string,
170                # not as a null/None value.
171                text = ''
172        else:
173            text = node_or_string
174        return func(self, shape, text)
175    return _get_text_content
176
177
178class ResponseParserError(Exception):
179    pass
180
181
182class ResponseParser(object):
183    """Base class for response parsing.
184
185    This class represents the interface that all ResponseParsers for the
186    various protocols must implement.
187
188    This class will take an HTTP response and a model shape and parse the
189    HTTP response into a dictionary.
190
191    There is a single public method exposed: ``parse``.  See the ``parse``
192    docstring for more info.
193
194    """
195    DEFAULT_ENCODING = 'utf-8'
196    EVENT_STREAM_PARSER_CLS = None
197
198    def __init__(self, timestamp_parser=None, blob_parser=None):
199        if timestamp_parser is None:
200            timestamp_parser = DEFAULT_TIMESTAMP_PARSER
201        self._timestamp_parser = timestamp_parser
202        if blob_parser is None:
203            blob_parser = self._default_blob_parser
204        self._blob_parser = blob_parser
205        self._event_stream_parser = None
206        if self.EVENT_STREAM_PARSER_CLS is not None:
207            self._event_stream_parser = self.EVENT_STREAM_PARSER_CLS(
208                timestamp_parser, blob_parser)
209
210    def _default_blob_parser(self, value):
211        # Blobs are always returned as bytes type (this matters on python3).
212        # We don't decode this to a str because it's entirely possible that the
213        # blob contains binary data that actually can't be decoded.
214        return base64.b64decode(value)
215
216    def parse(self, response, shape):
217        """Parse the HTTP response given a shape.
218
219        :param response: The HTTP response dictionary.  This is a dictionary
220            that represents the HTTP request.  The dictionary must have the
221            following keys, ``body``, ``headers``, and ``status_code``.
222
223        :param shape: The model shape describing the expected output.
224        :return: Returns a dictionary representing the parsed response
225            described by the model.  In addition to the shape described from
226            the model, each response will also have a ``ResponseMetadata``
227            which contains metadata about the response, which contains at least
228            two keys containing ``RequestId`` and ``HTTPStatusCode``.  Some
229            responses may populate additional keys, but ``RequestId`` will
230            always be present.
231
232        """
233        LOG.debug('Response headers: %s', response['headers'])
234        LOG.debug('Response body:\n%s', response['body'])
235        if response['status_code'] >= 301:
236            if self._is_generic_error_response(response):
237                parsed = self._do_generic_error_parse(response)
238            elif self._is_modeled_error_shape(shape):
239                parsed = self._do_modeled_error_parse(response, shape)
240                # We don't want to decorate the modeled fields with metadata
241                return parsed
242            else:
243                parsed = self._do_error_parse(response, shape)
244        else:
245            parsed = self._do_parse(response, shape)
246
247        # We don't want to decorate event stream responses with metadata
248        if shape and shape.serialization.get('eventstream'):
249            return parsed
250
251        # Add ResponseMetadata if it doesn't exist and inject the HTTP
252        # status code and headers from the response.
253        if isinstance(parsed, dict):
254            response_metadata = parsed.get('ResponseMetadata', {})
255            response_metadata['HTTPStatusCode'] = response['status_code']
256            # Ensure that the http header keys are all lower cased. Older
257            # versions of urllib3 (< 1.11) would unintentionally do this for us
258            # (see urllib3#633). We need to do this conversion manually now.
259            headers = response['headers']
260            response_metadata['HTTPHeaders'] = lowercase_dict(headers)
261            parsed['ResponseMetadata'] = response_metadata
262        return parsed
263
264    def _is_modeled_error_shape(self, shape):
265        return shape is not None and shape.metadata.get('exception', False)
266
267    def _is_generic_error_response(self, response):
268        # There are times when a service will respond with a generic
269        # error response such as:
270        # '<html><body><b>Http/1.1 Service Unavailable</b></body></html>'
271        #
272        # This can also happen if you're going through a proxy.
273        # In this case the protocol specific _do_error_parse will either
274        # fail to parse the response (in the best case) or silently succeed
275        # and treat the HTML above as an XML response and return
276        # non sensical parsed data.
277        # To prevent this case from happening we first need to check
278        # whether or not this response looks like the generic response.
279        if response['status_code'] >= 500:
280            if 'body' not in response or response['body'] is None:
281                return True
282
283            body = response['body'].strip()
284            return body.startswith(b'<html>') or not body
285
286    def _do_generic_error_parse(self, response):
287        # There's not really much we can do when we get a generic
288        # html response.
289        LOG.debug("Received a non protocol specific error response from the "
290                  "service, unable to populate error code and message.")
291        return {
292            'Error': {'Code': str(response['status_code']),
293                      'Message': six.moves.http_client.responses.get(
294                          response['status_code'], '')},
295            'ResponseMetadata': {},
296        }
297
298    def _do_parse(self, response, shape):
299        raise NotImplementedError("%s._do_parse" % self.__class__.__name__)
300
301    def _do_error_parse(self, response, shape):
302        raise NotImplementedError(
303            "%s._do_error_parse" % self.__class__.__name__)
304
305    def _do_modeled_error_parse(self, response, shape, parsed):
306        raise NotImplementedError(
307            "%s._do_modeled_error_parse" % self.__class__.__name__)
308
309    def _parse_shape(self, shape, node):
310        handler = getattr(self, '_handle_%s' % shape.type_name,
311                          self._default_handle)
312        return handler(shape, node)
313
314    def _handle_list(self, shape, node):
315        # Enough implementations share list serialization that it's moved
316        # up here in the base class.
317        parsed = []
318        member_shape = shape.member
319        for item in node:
320            parsed.append(self._parse_shape(member_shape, item))
321        return parsed
322
323    def _default_handle(self, shape, value):
324        return value
325
326    def _create_event_stream(self, response, shape):
327        parser = self._event_stream_parser
328        name = response['context'].get('operation_name')
329        return EventStream(response['body'], shape, parser, name)
330
331    def _get_first_key(self, value):
332        return list(value)[0]
333
334    def _has_unknown_tagged_union_member(self, shape, value):
335        if shape.is_tagged_union:
336            if len(value) != 1:
337                error_msg = (
338                    "Invalid service response: %s must have one and only "
339                    "one member set."
340                )
341                raise ResponseParserError(error_msg % shape.name)
342            tag = self._get_first_key(value)
343            if tag not in shape.members:
344                msg = (
345                    "Received a tagged union response with member "
346                    "unknown to client: %s. Please upgrade SDK for full "
347                    "response support."
348                )
349                LOG.info(msg % tag)
350                return True
351        return False
352
353    def _handle_unknown_tagged_union_member(self, tag):
354        return {'SDK_UNKNOWN_MEMBER': {'name': tag}}
355
356
357class BaseXMLResponseParser(ResponseParser):
358    def __init__(self, timestamp_parser=None, blob_parser=None):
359        super(BaseXMLResponseParser, self).__init__(timestamp_parser,
360                                                    blob_parser)
361        self._namespace_re = re.compile('{.*}')
362
363    def _handle_map(self, shape, node):
364        parsed = {}
365        key_shape = shape.key
366        value_shape = shape.value
367        key_location_name = key_shape.serialization.get('name') or 'key'
368        value_location_name = value_shape.serialization.get('name') or 'value'
369        if shape.serialization.get('flattened') and not isinstance(node, list):
370            node = [node]
371        for keyval_node in node:
372            for single_pair in keyval_node:
373                # Within each <entry> there's a <key> and a <value>
374                tag_name = self._node_tag(single_pair)
375                if tag_name == key_location_name:
376                    key_name = self._parse_shape(key_shape, single_pair)
377                elif tag_name == value_location_name:
378                    val_name = self._parse_shape(value_shape, single_pair)
379                else:
380                    raise ResponseParserError("Unknown tag: %s" % tag_name)
381            parsed[key_name] = val_name
382        return parsed
383
384    def _node_tag(self, node):
385        return self._namespace_re.sub('', node.tag)
386
387    def _handle_list(self, shape, node):
388        # When we use _build_name_to_xml_node, repeated elements are aggregated
389        # into a list.  However, we can't tell the difference between a scalar
390        # value and a single element flattened list.  So before calling the
391        # real _handle_list, we know that "node" should actually be a list if
392        # it's flattened, and if it's not, then we make it a one element list.
393        if shape.serialization.get('flattened') and not isinstance(node, list):
394            node = [node]
395        return super(BaseXMLResponseParser, self)._handle_list(shape, node)
396
397    def _handle_structure(self, shape, node):
398        parsed = {}
399        members = shape.members
400        if shape.metadata.get('exception', False):
401            node = self._get_error_root(node)
402        xml_dict = self._build_name_to_xml_node(node)
403        if self._has_unknown_tagged_union_member(shape, xml_dict):
404            tag = self._get_first_key(xml_dict)
405            return self._handle_unknown_tagged_union_member(tag)
406        for member_name in members:
407            member_shape = members[member_name]
408            if 'location' in member_shape.serialization or \
409               member_shape.serialization.get('eventheader'):
410                # All members with locations have already been handled,
411                # so we don't need to parse these members.
412                continue
413            xml_name = self._member_key_name(member_shape, member_name)
414            member_node = xml_dict.get(xml_name)
415            if member_node is not None:
416                parsed[member_name] = self._parse_shape(
417                    member_shape, member_node)
418            elif member_shape.serialization.get('xmlAttribute'):
419                attribs = {}
420                location_name = member_shape.serialization['name']
421                for key, value in node.attrib.items():
422                    new_key = self._namespace_re.sub(
423                        location_name.split(':')[0] + ':', key)
424                    attribs[new_key] = value
425                if location_name in attribs:
426                    parsed[member_name] = attribs[location_name]
427        return parsed
428
429    def _get_error_root(self, original_root):
430        if self._node_tag(original_root) == 'ErrorResponse':
431            for child in original_root:
432                if self._node_tag(child) == 'Error':
433                    return child
434        return original_root
435
436    def _member_key_name(self, shape, member_name):
437        # This method is needed because we have to special case flattened list
438        # with a serialization name.  If this is the case we use the
439        # locationName from the list's member shape as the key name for the
440        # surrounding structure.
441        if shape.type_name == 'list' and shape.serialization.get('flattened'):
442            list_member_serialized_name = shape.member.serialization.get(
443                'name')
444            if list_member_serialized_name is not None:
445                return list_member_serialized_name
446        serialized_name = shape.serialization.get('name')
447        if serialized_name is not None:
448            return serialized_name
449        return member_name
450
451    def _build_name_to_xml_node(self, parent_node):
452        # If the parent node is actually a list. We should not be trying
453        # to serialize it to a dictionary. Instead, return the first element
454        # in the list.
455        if isinstance(parent_node, list):
456            return self._build_name_to_xml_node(parent_node[0])
457        xml_dict = {}
458        for item in parent_node:
459            key = self._node_tag(item)
460            if key in xml_dict:
461                # If the key already exists, the most natural
462                # way to handle this is to aggregate repeated
463                # keys into a single list.
464                # <foo>1</foo><foo>2</foo> -> {'foo': [Node(1), Node(2)]}
465                if isinstance(xml_dict[key], list):
466                    xml_dict[key].append(item)
467                else:
468                    # Convert from a scalar to a list.
469                    xml_dict[key] = [xml_dict[key], item]
470            else:
471                xml_dict[key] = item
472        return xml_dict
473
474    def _parse_xml_string_to_dom(self, xml_string):
475        try:
476            parser = ETree.XMLParser(
477                target=ETree.TreeBuilder(),
478                encoding=self.DEFAULT_ENCODING)
479            parser.feed(xml_string)
480            root = parser.close()
481        except XMLParseError as e:
482            raise ResponseParserError(
483                "Unable to parse response (%s), "
484                "invalid XML received. Further retries may succeed:\n%s" %
485                (e, xml_string))
486        return root
487
488    def _replace_nodes(self, parsed):
489        for key, value in parsed.items():
490            if list(value):
491                sub_dict = self._build_name_to_xml_node(value)
492                parsed[key] = self._replace_nodes(sub_dict)
493            else:
494                parsed[key] = value.text
495        return parsed
496
497    @_text_content
498    def _handle_boolean(self, shape, text):
499        if text == 'true':
500            return True
501        else:
502            return False
503
504    @_text_content
505    def _handle_float(self, shape, text):
506        return float(text)
507
508    @_text_content
509    def _handle_timestamp(self, shape, text):
510        return self._timestamp_parser(text)
511
512    @_text_content
513    def _handle_integer(self, shape, text):
514        return int(text)
515
516    @_text_content
517    def _handle_string(self, shape, text):
518        return text
519
520    @_text_content
521    def _handle_blob(self, shape, text):
522        return self._blob_parser(text)
523
524    _handle_character = _handle_string
525    _handle_double = _handle_float
526    _handle_long = _handle_integer
527
528
529class QueryParser(BaseXMLResponseParser):
530
531    def _do_error_parse(self, response, shape):
532        xml_contents = response['body']
533        root = self._parse_xml_string_to_dom(xml_contents)
534        parsed = self._build_name_to_xml_node(root)
535        self._replace_nodes(parsed)
536        # Once we've converted xml->dict, we need to make one or two
537        # more adjustments to extract nested errors and to be consistent
538        # with ResponseMetadata for non-error responses:
539        # 1. {"Errors": {"Error": {...}}} -> {"Error": {...}}
540        # 2. {"RequestId": "id"} -> {"ResponseMetadata": {"RequestId": "id"}}
541        if 'Errors' in parsed:
542            parsed.update(parsed.pop('Errors'))
543        if 'RequestId' in parsed:
544            parsed['ResponseMetadata'] = {'RequestId': parsed.pop('RequestId')}
545        return parsed
546
547    def _do_modeled_error_parse(self, response, shape):
548        return self._parse_body_as_xml(response, shape, inject_metadata=False)
549
550    def _do_parse(self, response, shape):
551        return self._parse_body_as_xml(response, shape, inject_metadata=True)
552
553    def _parse_body_as_xml(self, response, shape, inject_metadata=True):
554        xml_contents = response['body']
555        root = self._parse_xml_string_to_dom(xml_contents)
556        parsed = {}
557        if shape is not None:
558            start = root
559            if 'resultWrapper' in shape.serialization:
560                start = self._find_result_wrapped_shape(
561                    shape.serialization['resultWrapper'],
562                    root)
563            parsed = self._parse_shape(shape, start)
564        if inject_metadata:
565            self._inject_response_metadata(root, parsed)
566        return parsed
567
568    def _find_result_wrapped_shape(self, element_name, xml_root_node):
569        mapping = self._build_name_to_xml_node(xml_root_node)
570        return mapping[element_name]
571
572    def _inject_response_metadata(self, node, inject_into):
573        mapping = self._build_name_to_xml_node(node)
574        child_node = mapping.get('ResponseMetadata')
575        if child_node is not None:
576            sub_mapping = self._build_name_to_xml_node(child_node)
577            for key, value in sub_mapping.items():
578                sub_mapping[key] = value.text
579            inject_into['ResponseMetadata'] = sub_mapping
580
581
582class EC2QueryParser(QueryParser):
583
584    def _inject_response_metadata(self, node, inject_into):
585        mapping = self._build_name_to_xml_node(node)
586        child_node = mapping.get('requestId')
587        if child_node is not None:
588            inject_into['ResponseMetadata'] = {'RequestId': child_node.text}
589
590    def _do_error_parse(self, response, shape):
591        # EC2 errors look like:
592        # <Response>
593        #   <Errors>
594        #     <Error>
595        #       <Code>InvalidInstanceID.Malformed</Code>
596        #       <Message>Invalid id: "1343124"</Message>
597        #     </Error>
598        #   </Errors>
599        #   <RequestID>12345</RequestID>
600        # </Response>
601        # This is different from QueryParser in that it's RequestID,
602        # not RequestId
603        original = super(EC2QueryParser, self)._do_error_parse(response, shape)
604        if 'RequestID' in original:
605            original['ResponseMetadata'] = {
606                'RequestId': original.pop('RequestID')
607            }
608        return original
609
610    def _get_error_root(self, original_root):
611        for child in original_root:
612            if self._node_tag(child) == 'Errors':
613                for errors_child in child:
614                    if self._node_tag(errors_child) == 'Error':
615                        return errors_child
616        return original_root
617
618
619class BaseJSONParser(ResponseParser):
620
621    def _handle_structure(self, shape, value):
622        final_parsed = {}
623        if shape.is_document_type:
624            final_parsed = value
625        else:
626            member_shapes = shape.members
627            if value is None:
628                # If the comes across the wire as "null" (None in python),
629                # we should be returning this unchanged, instead of as an
630                # empty dict.
631                return None
632            final_parsed = {}
633            if self._has_unknown_tagged_union_member(shape, value):
634                tag = self._get_first_key(value)
635                return self._handle_unknown_tagged_union_member(tag)
636            for member_name in member_shapes:
637                member_shape = member_shapes[member_name]
638                json_name = member_shape.serialization.get('name', member_name)
639                raw_value = value.get(json_name)
640                if raw_value is not None:
641                    final_parsed[member_name] = self._parse_shape(
642                        member_shapes[member_name],
643                        raw_value)
644        return final_parsed
645
646    def _handle_map(self, shape, value):
647        parsed = {}
648        key_shape = shape.key
649        value_shape = shape.value
650        for key, value in value.items():
651            actual_key = self._parse_shape(key_shape, key)
652            actual_value = self._parse_shape(value_shape, value)
653            parsed[actual_key] = actual_value
654        return parsed
655
656    def _handle_blob(self, shape, value):
657        return self._blob_parser(value)
658
659    def _handle_timestamp(self, shape, value):
660        return self._timestamp_parser(value)
661
662    def _do_error_parse(self, response, shape):
663        body = self._parse_body_as_json(response['body'])
664        error = {"Error": {"Message": '', "Code": ''}, "ResponseMetadata": {}}
665        # Error responses can have slightly different structures for json.
666        # The basic structure is:
667        #
668        # {"__type":"ConnectClientException",
669        #  "message":"The error message."}
670
671        # The error message can either come in the 'message' or 'Message' key
672        # so we need to check for both.
673        error['Error']['Message'] = body.get('message',
674                                             body.get('Message', ''))
675        # if the message did not contain an error code
676        # include the response status code
677        response_code = response.get('status_code')
678        code = body.get('__type', response_code and str(response_code))
679        if code is not None:
680            # code has a couple forms as well:
681            # * "com.aws.dynamodb.vAPI#ProvisionedThroughputExceededException"
682            # * "ResourceNotFoundException"
683            if '#' in code:
684                code = code.rsplit('#', 1)[1]
685            error['Error']['Code'] = code
686        self._inject_response_metadata(error, response['headers'])
687        return error
688
689    def _inject_response_metadata(self, parsed, headers):
690        if 'x-amzn-requestid' in headers:
691            parsed.setdefault('ResponseMetadata', {})['RequestId'] = (
692                headers['x-amzn-requestid'])
693
694    def _parse_body_as_json(self, body_contents):
695        if not body_contents:
696            return {}
697        body = body_contents.decode(self.DEFAULT_ENCODING)
698        try:
699            original_parsed = json.loads(body)
700            return original_parsed
701        except ValueError:
702            # if the body cannot be parsed, include
703            # the literal string as the message
704            return {'message': body}
705
706
707class BaseEventStreamParser(ResponseParser):
708
709    def _do_parse(self, response, shape):
710        final_parsed = {}
711        if shape.serialization.get('eventstream'):
712            event_type = response['headers'].get(':event-type')
713            event_shape = shape.members.get(event_type)
714            if event_shape:
715                final_parsed[event_type] = self._do_parse(response, event_shape)
716        else:
717            self._parse_non_payload_attrs(response, shape,
718                                          shape.members, final_parsed)
719            self._parse_payload(response, shape, shape.members, final_parsed)
720        return final_parsed
721
722    def _do_error_parse(self, response, shape):
723        exception_type = response['headers'].get(':exception-type')
724        exception_shape = shape.members.get(exception_type)
725        if exception_shape is not None:
726            original_parsed = self._initial_body_parse(response['body'])
727            body = self._parse_shape(exception_shape, original_parsed)
728            error = {
729                'Error': {
730                    'Code': exception_type,
731                    'Message': body.get('Message', body.get('message', ''))
732                }
733            }
734        else:
735            error = {
736                'Error': {
737                    'Code': response['headers'].get(':error-code', ''),
738                    'Message': response['headers'].get(':error-message', ''),
739                }
740            }
741        return error
742
743    def _parse_payload(self, response, shape, member_shapes, final_parsed):
744        if shape.serialization.get('event'):
745            for name in member_shapes:
746                member_shape = member_shapes[name]
747                if member_shape.serialization.get('eventpayload'):
748                    body = response['body']
749                    if member_shape.type_name == 'blob':
750                        parsed_body = body
751                    elif member_shape.type_name == 'string':
752                        parsed_body = body.decode(self.DEFAULT_ENCODING)
753                    else:
754                        raw_parse = self._initial_body_parse(body)
755                        parsed_body = self._parse_shape(member_shape, raw_parse)
756                    final_parsed[name] = parsed_body
757                    return
758            # If we didn't find an explicit payload, use the current shape
759            original_parsed = self._initial_body_parse(response['body'])
760            body_parsed = self._parse_shape(shape, original_parsed)
761            final_parsed.update(body_parsed)
762
763    def _parse_non_payload_attrs(self, response, shape,
764                                 member_shapes, final_parsed):
765        headers = response['headers']
766        for name in member_shapes:
767            member_shape = member_shapes[name]
768            if member_shape.serialization.get('eventheader'):
769                if name in headers:
770                    value = headers[name]
771                    if member_shape.type_name == 'timestamp':
772                        # Event stream timestamps are an in milleseconds so we
773                        # divide by 1000 to convert to seconds.
774                        value = self._timestamp_parser(value / 1000.0)
775                    final_parsed[name] = value
776
777    def _initial_body_parse(self, body_contents):
778        # This method should do the initial xml/json parsing of the
779        # body.  We we still need to walk the parsed body in order
780        # to convert types, but this method will do the first round
781        # of parsing.
782        raise NotImplementedError("_initial_body_parse")
783
784
785class EventStreamJSONParser(BaseEventStreamParser, BaseJSONParser):
786
787    def _initial_body_parse(self, body_contents):
788        return self._parse_body_as_json(body_contents)
789
790
791class EventStreamXMLParser(BaseEventStreamParser, BaseXMLResponseParser):
792
793    def _initial_body_parse(self, xml_string):
794        if not xml_string:
795            return ETree.Element('')
796        return self._parse_xml_string_to_dom(xml_string)
797
798
799class JSONParser(BaseJSONParser):
800
801    EVENT_STREAM_PARSER_CLS = EventStreamJSONParser
802
803    """Response parser for the "json" protocol."""
804    def _do_parse(self, response, shape):
805        parsed = {}
806        if shape is not None:
807            event_name = shape.event_stream_name
808            if event_name:
809                parsed = self._handle_event_stream(response, shape, event_name)
810            else:
811                parsed = self._handle_json_body(response['body'], shape)
812        self._inject_response_metadata(parsed, response['headers'])
813        return parsed
814
815    def _do_modeled_error_parse(self, response, shape):
816        return self._handle_json_body(response['body'], shape)
817
818    def _handle_event_stream(self, response, shape, event_name):
819        event_stream_shape = shape.members[event_name]
820        event_stream = self._create_event_stream(response, event_stream_shape)
821        try:
822            event = event_stream.get_initial_response()
823        except NoInitialResponseError:
824            error_msg = 'First event was not of type initial-response'
825            raise ResponseParserError(error_msg)
826        parsed = self._handle_json_body(event.payload, shape)
827        parsed[event_name] = event_stream
828        return parsed
829
830    def _handle_json_body(self, raw_body, shape):
831        # The json.loads() gives us the primitive JSON types,
832        # but we need to traverse the parsed JSON data to convert
833        # to richer types (blobs, timestamps, etc.
834        parsed_json = self._parse_body_as_json(raw_body)
835        return self._parse_shape(shape, parsed_json)
836
837
838class BaseRestParser(ResponseParser):
839
840    def _do_parse(self, response, shape):
841        final_parsed = {}
842        final_parsed['ResponseMetadata'] = self._populate_response_metadata(
843            response)
844        self._add_modeled_parse(response, shape, final_parsed)
845        return final_parsed
846
847    def _add_modeled_parse(self, response, shape, final_parsed):
848        if shape is None:
849            return final_parsed
850        member_shapes = shape.members
851        self._parse_non_payload_attrs(response, shape,
852                                      member_shapes, final_parsed)
853        self._parse_payload(response, shape, member_shapes, final_parsed)
854
855    def _do_modeled_error_parse(self, response, shape):
856        final_parsed = {}
857        self._add_modeled_parse(response, shape, final_parsed)
858        return final_parsed
859
860    def _populate_response_metadata(self, response):
861        metadata = {}
862        headers = response['headers']
863        if 'x-amzn-requestid' in headers:
864            metadata['RequestId'] = headers['x-amzn-requestid']
865        elif 'x-amz-request-id' in headers:
866            metadata['RequestId'] = headers['x-amz-request-id']
867            # HostId is what it's called whenever this value is returned
868            # in an XML response body, so to be consistent, we'll always
869            # call is HostId.
870            metadata['HostId'] = headers.get('x-amz-id-2', '')
871        return metadata
872
873    def _parse_payload(self, response, shape, member_shapes, final_parsed):
874        if 'payload' in shape.serialization:
875            # If a payload is specified in the output shape, then only that
876            # shape is used for the body payload.
877            payload_member_name = shape.serialization['payload']
878            body_shape = member_shapes[payload_member_name]
879            if body_shape.serialization.get('eventstream'):
880                body = self._create_event_stream(response, body_shape)
881                final_parsed[payload_member_name] = body
882            elif body_shape.type_name in ['string', 'blob']:
883                # This is a stream
884                body = response['body']
885                if isinstance(body, bytes):
886                    body = body.decode(self.DEFAULT_ENCODING)
887                final_parsed[payload_member_name] = body
888            else:
889                original_parsed = self._initial_body_parse(response['body'])
890                final_parsed[payload_member_name] = self._parse_shape(
891                    body_shape, original_parsed)
892        else:
893            original_parsed = self._initial_body_parse(response['body'])
894            body_parsed = self._parse_shape(shape, original_parsed)
895            final_parsed.update(body_parsed)
896
897    def _parse_non_payload_attrs(self, response, shape,
898                                 member_shapes, final_parsed):
899        headers = response['headers']
900        for name in member_shapes:
901            member_shape = member_shapes[name]
902            location = member_shape.serialization.get('location')
903            if location is None:
904                continue
905            elif location == 'statusCode':
906                final_parsed[name] = self._parse_shape(
907                    member_shape, response['status_code'])
908            elif location == 'headers':
909                final_parsed[name] = self._parse_header_map(member_shape,
910                                                            headers)
911            elif location == 'header':
912                header_name = member_shape.serialization.get('name', name)
913                if header_name in headers:
914                    final_parsed[name] = self._parse_shape(
915                        member_shape, headers[header_name])
916
917    def _parse_header_map(self, shape, headers):
918        # Note that headers are case insensitive, so we .lower()
919        # all header names and header prefixes.
920        parsed = {}
921        prefix = shape.serialization.get('name', '').lower()
922        for header_name in headers:
923            if header_name.lower().startswith(prefix):
924                # The key name inserted into the parsed hash
925                # strips off the prefix.
926                name = header_name[len(prefix):]
927                parsed[name] = headers[header_name]
928        return parsed
929
930    def _initial_body_parse(self, body_contents):
931        # This method should do the initial xml/json parsing of the
932        # body.  We we still need to walk the parsed body in order
933        # to convert types, but this method will do the first round
934        # of parsing.
935        raise NotImplementedError("_initial_body_parse")
936
937    def _handle_string(self, shape, value):
938        parsed = value
939        if is_json_value_header(shape):
940            decoded = base64.b64decode(value).decode(self.DEFAULT_ENCODING)
941            parsed = json.loads(decoded)
942        return parsed
943
944
945class RestJSONParser(BaseRestParser, BaseJSONParser):
946
947    EVENT_STREAM_PARSER_CLS = EventStreamJSONParser
948
949    def _initial_body_parse(self, body_contents):
950        return self._parse_body_as_json(body_contents)
951
952    def _do_error_parse(self, response, shape):
953        error = super(RestJSONParser, self)._do_error_parse(response, shape)
954        self._inject_error_code(error, response)
955        return error
956
957    def _inject_error_code(self, error, response):
958        # The "Code" value can come from either a response
959        # header or a value in the JSON body.
960        body = self._initial_body_parse(response['body'])
961        if 'x-amzn-errortype' in response['headers']:
962            code = response['headers']['x-amzn-errortype']
963            # Could be:
964            # x-amzn-errortype: ValidationException:
965            code = code.split(':')[0]
966            error['Error']['Code'] = code
967        elif 'code' in body or 'Code' in body:
968            error['Error']['Code'] = body.get(
969                'code', body.get('Code', ''))
970
971
972class RestXMLParser(BaseRestParser, BaseXMLResponseParser):
973
974    EVENT_STREAM_PARSER_CLS = EventStreamXMLParser
975
976    def _initial_body_parse(self, xml_string):
977        if not xml_string:
978            return ETree.Element('')
979        return self._parse_xml_string_to_dom(xml_string)
980
981    def _do_error_parse(self, response, shape):
982        # We're trying to be service agnostic here, but S3 does have a slightly
983        # different response structure for its errors compared to other
984        # rest-xml serivces (route53/cloudfront).  We handle this by just
985        # trying to parse both forms.
986        # First:
987        # <ErrorResponse xmlns="...">
988        #   <Error>
989        #     <Type>Sender</Type>
990        #     <Code>InvalidInput</Code>
991        #     <Message>Invalid resource type: foo</Message>
992        #   </Error>
993        #   <RequestId>request-id</RequestId>
994        # </ErrorResponse>
995        if response['body']:
996            # If the body ends up being invalid xml, the xml parser should not
997            # blow up. It should at least try to pull information about the
998            # the error response from other sources like the HTTP status code.
999            try:
1000                return self._parse_error_from_body(response)
1001            except ResponseParserError:
1002                LOG.debug(
1003                    'Exception caught when parsing error response body:',
1004                    exc_info=True)
1005        return self._parse_error_from_http_status(response)
1006
1007    def _parse_error_from_http_status(self, response):
1008        return {
1009            'Error': {
1010                'Code': str(response['status_code']),
1011                'Message': six.moves.http_client.responses.get(
1012                    response['status_code'], ''),
1013            },
1014            'ResponseMetadata': {
1015                'RequestId': response['headers'].get('x-amz-request-id', ''),
1016                'HostId': response['headers'].get('x-amz-id-2', ''),
1017            }
1018        }
1019
1020    def _parse_error_from_body(self, response):
1021        xml_contents = response['body']
1022        root = self._parse_xml_string_to_dom(xml_contents)
1023        parsed = self._build_name_to_xml_node(root)
1024        self._replace_nodes(parsed)
1025        if root.tag == 'Error':
1026            # This is an S3 error response.  First we'll populate the
1027            # response metadata.
1028            metadata = self._populate_response_metadata(response)
1029            # The RequestId and the HostId are already in the
1030            # ResponseMetadata, but are also duplicated in the XML
1031            # body.  We don't need these values in both places,
1032            # we'll just remove them from the parsed XML body.
1033            parsed.pop('RequestId', '')
1034            parsed.pop('HostId', '')
1035            return {'Error': parsed, 'ResponseMetadata': metadata}
1036        elif 'RequestId' in parsed:
1037            # Other rest-xml serivces:
1038            parsed['ResponseMetadata'] = {'RequestId': parsed.pop('RequestId')}
1039        default = {'Error': {'Message': '', 'Code': ''}}
1040        merge_dicts(default, parsed)
1041        return default
1042
1043    @_text_content
1044    def _handle_string(self, shape, text):
1045        text = super(RestXMLParser, self)._handle_string(shape, text)
1046        return text
1047
1048
1049PROTOCOL_PARSERS = {
1050    'ec2': EC2QueryParser,
1051    'query': QueryParser,
1052    'json': JSONParser,
1053    'rest-json': RestJSONParser,
1054    'rest-xml': RestXMLParser,
1055}
1056