1# Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"). You 4# may not use this file except in compliance with the License. A copy of 5# the License is located at 6# 7# http://aws.amazon.com/apache2.0/ 8# 9# or in the "license" file accompanying this file. This file is 10# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF 11# ANY KIND, either express or implied. See the License for the specific 12# language governing permissions and limitations under the License. 13"""Response parsers for the various protocol types. 14 15The module contains classes that can take an HTTP response, and given 16an output shape, parse the response into a dict according to the 17rules in the output shape. 18 19There are many similarities amongst the different protocols with regard 20to response parsing, and the code is structured in a way to avoid 21code duplication when possible. The diagram below is a diagram 22showing the inheritance hierarchy of the response classes. 23 24:: 25 26 27 28 +--------------+ 29 |ResponseParser| 30 +--------------+ 31 ^ ^ ^ 32 +--------------------+ | +-------------------+ 33 | | | 34 +----------+----------+ +------+-------+ +-------+------+ 35 |BaseXMLResponseParser| |BaseRestParser| |BaseJSONParser| 36 +---------------------+ +--------------+ +--------------+ 37 ^ ^ ^ ^ ^ ^ 38 | | | | | | 39 | | | | | | 40 | ++----------+-+ +-+-----------++ | 41 | |RestXMLParser| |RestJSONParser| | 42 +-----+-----+ +-------------+ +--------------+ +----+-----+ 43 |QueryParser| |JSONParser| 44 +-----------+ +----------+ 45 46 47The diagram above shows that there is a base class, ``ResponseParser`` that 48contains logic that is similar amongst all the different protocols (``query``, 49``json``, ``rest-json``, ``rest-xml``). Amongst the various services there 50is shared logic that can be grouped several ways: 51 52* The ``query`` and ``rest-xml`` both have XML bodies that are parsed in the 53 same way. 54* The ``json`` and ``rest-json`` protocols both have JSON bodies that are 55 parsed in the same way. 56* The ``rest-json`` and ``rest-xml`` protocols have additional attributes 57 besides body parameters that are parsed the same (headers, query string, 58 status code). 59 60This is reflected in the class diagram above. The ``BaseXMLResponseParser`` 61and the BaseJSONParser contain logic for parsing the XML/JSON body, 62and the BaseRestParser contains logic for parsing out attributes that 63come from other parts of the HTTP response. Classes like the 64``RestXMLParser`` inherit from the ``BaseXMLResponseParser`` to get the 65XML body parsing logic and the ``BaseRestParser`` to get the HTTP 66header/status code/query string parsing. 67 68Additionally, there are event stream parsers that are used by the other parsers 69to wrap streaming bodies that represent a stream of events. The 70BaseEventStreamParser extends from ResponseParser and defines the logic for 71parsing values from the headers and payload of a message from the underlying 72binary encoding protocol. Currently, event streams support parsing bodies 73encoded as JSON and XML through the following hierarchy. 74 75 76 +--------------+ 77 |ResponseParser| 78 +--------------+ 79 ^ ^ ^ 80 +--------------------+ | +------------------+ 81 | | | 82 +----------+----------+ +----------+----------+ +-------+------+ 83 |BaseXMLResponseParser| |BaseEventStreamParser| |BaseJSONParser| 84 +---------------------+ +---------------------+ +--------------+ 85 ^ ^ ^ ^ 86 | | | | 87 | | | | 88 +-+----------------+-+ +-+-----------------+-+ 89 |EventStreamXMLParser| |EventStreamJSONParser| 90 +--------------------+ +---------------------+ 91 92Return Values 93============= 94 95Each call to ``parse()`` returns a dict has this form:: 96 97 Standard Response 98 99 { 100 "ResponseMetadata": {"RequestId": <requestid>} 101 <response keys> 102 } 103 104 Error response 105 106 { 107 "ResponseMetadata": {"RequestId": <requestid>} 108 "Error": { 109 "Code": <string>, 110 "Message": <string>, 111 "Type": <string>, 112 <additional keys> 113 } 114 } 115 116""" 117import re 118import base64 119import json 120import logging 121 122from botocore.compat import six, ETree, XMLParseError 123from botocore.eventstream import EventStream, NoInitialResponseError 124 125from botocore.utils import parse_timestamp, merge_dicts, \ 126 is_json_value_header, lowercase_dict 127 128LOG = logging.getLogger(__name__) 129 130DEFAULT_TIMESTAMP_PARSER = parse_timestamp 131 132 133class ResponseParserFactory(object): 134 def __init__(self): 135 self._defaults = {} 136 137 def set_parser_defaults(self, **kwargs): 138 """Set default arguments when a parser instance is created. 139 140 You can specify any kwargs that are allowed by a ResponseParser 141 class. There are currently two arguments: 142 143 * timestamp_parser - A callable that can parse a timestamp string 144 * blob_parser - A callable that can parse a blob type 145 146 """ 147 self._defaults.update(kwargs) 148 149 def create_parser(self, protocol_name): 150 parser_cls = PROTOCOL_PARSERS[protocol_name] 151 return parser_cls(**self._defaults) 152 153 154def create_parser(protocol): 155 return ResponseParserFactory().create_parser(protocol) 156 157 158def _text_content(func): 159 # This decorator hides the difference between 160 # an XML node with text or a plain string. It's used 161 # to ensure that scalar processing operates only on text 162 # strings, which allows the same scalar handlers to be used 163 # for XML nodes from the body and HTTP headers. 164 def _get_text_content(self, shape, node_or_string): 165 if hasattr(node_or_string, 'text'): 166 text = node_or_string.text 167 if text is None: 168 # If an XML node is empty <foo></foo>, 169 # we want to parse that as an empty string, 170 # not as a null/None value. 171 text = '' 172 else: 173 text = node_or_string 174 return func(self, shape, text) 175 return _get_text_content 176 177 178class ResponseParserError(Exception): 179 pass 180 181 182class ResponseParser(object): 183 """Base class for response parsing. 184 185 This class represents the interface that all ResponseParsers for the 186 various protocols must implement. 187 188 This class will take an HTTP response and a model shape and parse the 189 HTTP response into a dictionary. 190 191 There is a single public method exposed: ``parse``. See the ``parse`` 192 docstring for more info. 193 194 """ 195 DEFAULT_ENCODING = 'utf-8' 196 EVENT_STREAM_PARSER_CLS = None 197 198 def __init__(self, timestamp_parser=None, blob_parser=None): 199 if timestamp_parser is None: 200 timestamp_parser = DEFAULT_TIMESTAMP_PARSER 201 self._timestamp_parser = timestamp_parser 202 if blob_parser is None: 203 blob_parser = self._default_blob_parser 204 self._blob_parser = blob_parser 205 self._event_stream_parser = None 206 if self.EVENT_STREAM_PARSER_CLS is not None: 207 self._event_stream_parser = self.EVENT_STREAM_PARSER_CLS( 208 timestamp_parser, blob_parser) 209 210 def _default_blob_parser(self, value): 211 # Blobs are always returned as bytes type (this matters on python3). 212 # We don't decode this to a str because it's entirely possible that the 213 # blob contains binary data that actually can't be decoded. 214 return base64.b64decode(value) 215 216 def parse(self, response, shape): 217 """Parse the HTTP response given a shape. 218 219 :param response: The HTTP response dictionary. This is a dictionary 220 that represents the HTTP request. The dictionary must have the 221 following keys, ``body``, ``headers``, and ``status_code``. 222 223 :param shape: The model shape describing the expected output. 224 :return: Returns a dictionary representing the parsed response 225 described by the model. In addition to the shape described from 226 the model, each response will also have a ``ResponseMetadata`` 227 which contains metadata about the response, which contains at least 228 two keys containing ``RequestId`` and ``HTTPStatusCode``. Some 229 responses may populate additional keys, but ``RequestId`` will 230 always be present. 231 232 """ 233 LOG.debug('Response headers: %s', response['headers']) 234 LOG.debug('Response body:\n%s', response['body']) 235 if response['status_code'] >= 301: 236 if self._is_generic_error_response(response): 237 parsed = self._do_generic_error_parse(response) 238 elif self._is_modeled_error_shape(shape): 239 parsed = self._do_modeled_error_parse(response, shape) 240 # We don't want to decorate the modeled fields with metadata 241 return parsed 242 else: 243 parsed = self._do_error_parse(response, shape) 244 else: 245 parsed = self._do_parse(response, shape) 246 247 # We don't want to decorate event stream responses with metadata 248 if shape and shape.serialization.get('eventstream'): 249 return parsed 250 251 # Add ResponseMetadata if it doesn't exist and inject the HTTP 252 # status code and headers from the response. 253 if isinstance(parsed, dict): 254 response_metadata = parsed.get('ResponseMetadata', {}) 255 response_metadata['HTTPStatusCode'] = response['status_code'] 256 # Ensure that the http header keys are all lower cased. Older 257 # versions of urllib3 (< 1.11) would unintentionally do this for us 258 # (see urllib3#633). We need to do this conversion manually now. 259 headers = response['headers'] 260 response_metadata['HTTPHeaders'] = lowercase_dict(headers) 261 parsed['ResponseMetadata'] = response_metadata 262 return parsed 263 264 def _is_modeled_error_shape(self, shape): 265 return shape is not None and shape.metadata.get('exception', False) 266 267 def _is_generic_error_response(self, response): 268 # There are times when a service will respond with a generic 269 # error response such as: 270 # '<html><body><b>Http/1.1 Service Unavailable</b></body></html>' 271 # 272 # This can also happen if you're going through a proxy. 273 # In this case the protocol specific _do_error_parse will either 274 # fail to parse the response (in the best case) or silently succeed 275 # and treat the HTML above as an XML response and return 276 # non sensical parsed data. 277 # To prevent this case from happening we first need to check 278 # whether or not this response looks like the generic response. 279 if response['status_code'] >= 500: 280 if 'body' not in response or response['body'] is None: 281 return True 282 283 body = response['body'].strip() 284 return body.startswith(b'<html>') or not body 285 286 def _do_generic_error_parse(self, response): 287 # There's not really much we can do when we get a generic 288 # html response. 289 LOG.debug("Received a non protocol specific error response from the " 290 "service, unable to populate error code and message.") 291 return { 292 'Error': {'Code': str(response['status_code']), 293 'Message': six.moves.http_client.responses.get( 294 response['status_code'], '')}, 295 'ResponseMetadata': {}, 296 } 297 298 def _do_parse(self, response, shape): 299 raise NotImplementedError("%s._do_parse" % self.__class__.__name__) 300 301 def _do_error_parse(self, response, shape): 302 raise NotImplementedError( 303 "%s._do_error_parse" % self.__class__.__name__) 304 305 def _do_modeled_error_parse(self, response, shape, parsed): 306 raise NotImplementedError( 307 "%s._do_modeled_error_parse" % self.__class__.__name__) 308 309 def _parse_shape(self, shape, node): 310 handler = getattr(self, '_handle_%s' % shape.type_name, 311 self._default_handle) 312 return handler(shape, node) 313 314 def _handle_list(self, shape, node): 315 # Enough implementations share list serialization that it's moved 316 # up here in the base class. 317 parsed = [] 318 member_shape = shape.member 319 for item in node: 320 parsed.append(self._parse_shape(member_shape, item)) 321 return parsed 322 323 def _default_handle(self, shape, value): 324 return value 325 326 def _create_event_stream(self, response, shape): 327 parser = self._event_stream_parser 328 name = response['context'].get('operation_name') 329 return EventStream(response['body'], shape, parser, name) 330 331 def _get_first_key(self, value): 332 return list(value)[0] 333 334 def _has_unknown_tagged_union_member(self, shape, value): 335 if shape.is_tagged_union: 336 if len(value) != 1: 337 error_msg = ( 338 "Invalid service response: %s must have one and only " 339 "one member set." 340 ) 341 raise ResponseParserError(error_msg % shape.name) 342 tag = self._get_first_key(value) 343 if tag not in shape.members: 344 msg = ( 345 "Received a tagged union response with member " 346 "unknown to client: %s. Please upgrade SDK for full " 347 "response support." 348 ) 349 LOG.info(msg % tag) 350 return True 351 return False 352 353 def _handle_unknown_tagged_union_member(self, tag): 354 return {'SDK_UNKNOWN_MEMBER': {'name': tag}} 355 356 357class BaseXMLResponseParser(ResponseParser): 358 def __init__(self, timestamp_parser=None, blob_parser=None): 359 super(BaseXMLResponseParser, self).__init__(timestamp_parser, 360 blob_parser) 361 self._namespace_re = re.compile('{.*}') 362 363 def _handle_map(self, shape, node): 364 parsed = {} 365 key_shape = shape.key 366 value_shape = shape.value 367 key_location_name = key_shape.serialization.get('name') or 'key' 368 value_location_name = value_shape.serialization.get('name') or 'value' 369 if shape.serialization.get('flattened') and not isinstance(node, list): 370 node = [node] 371 for keyval_node in node: 372 for single_pair in keyval_node: 373 # Within each <entry> there's a <key> and a <value> 374 tag_name = self._node_tag(single_pair) 375 if tag_name == key_location_name: 376 key_name = self._parse_shape(key_shape, single_pair) 377 elif tag_name == value_location_name: 378 val_name = self._parse_shape(value_shape, single_pair) 379 else: 380 raise ResponseParserError("Unknown tag: %s" % tag_name) 381 parsed[key_name] = val_name 382 return parsed 383 384 def _node_tag(self, node): 385 return self._namespace_re.sub('', node.tag) 386 387 def _handle_list(self, shape, node): 388 # When we use _build_name_to_xml_node, repeated elements are aggregated 389 # into a list. However, we can't tell the difference between a scalar 390 # value and a single element flattened list. So before calling the 391 # real _handle_list, we know that "node" should actually be a list if 392 # it's flattened, and if it's not, then we make it a one element list. 393 if shape.serialization.get('flattened') and not isinstance(node, list): 394 node = [node] 395 return super(BaseXMLResponseParser, self)._handle_list(shape, node) 396 397 def _handle_structure(self, shape, node): 398 parsed = {} 399 members = shape.members 400 if shape.metadata.get('exception', False): 401 node = self._get_error_root(node) 402 xml_dict = self._build_name_to_xml_node(node) 403 if self._has_unknown_tagged_union_member(shape, xml_dict): 404 tag = self._get_first_key(xml_dict) 405 return self._handle_unknown_tagged_union_member(tag) 406 for member_name in members: 407 member_shape = members[member_name] 408 if 'location' in member_shape.serialization or \ 409 member_shape.serialization.get('eventheader'): 410 # All members with locations have already been handled, 411 # so we don't need to parse these members. 412 continue 413 xml_name = self._member_key_name(member_shape, member_name) 414 member_node = xml_dict.get(xml_name) 415 if member_node is not None: 416 parsed[member_name] = self._parse_shape( 417 member_shape, member_node) 418 elif member_shape.serialization.get('xmlAttribute'): 419 attribs = {} 420 location_name = member_shape.serialization['name'] 421 for key, value in node.attrib.items(): 422 new_key = self._namespace_re.sub( 423 location_name.split(':')[0] + ':', key) 424 attribs[new_key] = value 425 if location_name in attribs: 426 parsed[member_name] = attribs[location_name] 427 return parsed 428 429 def _get_error_root(self, original_root): 430 if self._node_tag(original_root) == 'ErrorResponse': 431 for child in original_root: 432 if self._node_tag(child) == 'Error': 433 return child 434 return original_root 435 436 def _member_key_name(self, shape, member_name): 437 # This method is needed because we have to special case flattened list 438 # with a serialization name. If this is the case we use the 439 # locationName from the list's member shape as the key name for the 440 # surrounding structure. 441 if shape.type_name == 'list' and shape.serialization.get('flattened'): 442 list_member_serialized_name = shape.member.serialization.get( 443 'name') 444 if list_member_serialized_name is not None: 445 return list_member_serialized_name 446 serialized_name = shape.serialization.get('name') 447 if serialized_name is not None: 448 return serialized_name 449 return member_name 450 451 def _build_name_to_xml_node(self, parent_node): 452 # If the parent node is actually a list. We should not be trying 453 # to serialize it to a dictionary. Instead, return the first element 454 # in the list. 455 if isinstance(parent_node, list): 456 return self._build_name_to_xml_node(parent_node[0]) 457 xml_dict = {} 458 for item in parent_node: 459 key = self._node_tag(item) 460 if key in xml_dict: 461 # If the key already exists, the most natural 462 # way to handle this is to aggregate repeated 463 # keys into a single list. 464 # <foo>1</foo><foo>2</foo> -> {'foo': [Node(1), Node(2)]} 465 if isinstance(xml_dict[key], list): 466 xml_dict[key].append(item) 467 else: 468 # Convert from a scalar to a list. 469 xml_dict[key] = [xml_dict[key], item] 470 else: 471 xml_dict[key] = item 472 return xml_dict 473 474 def _parse_xml_string_to_dom(self, xml_string): 475 try: 476 parser = ETree.XMLParser( 477 target=ETree.TreeBuilder(), 478 encoding=self.DEFAULT_ENCODING) 479 parser.feed(xml_string) 480 root = parser.close() 481 except XMLParseError as e: 482 raise ResponseParserError( 483 "Unable to parse response (%s), " 484 "invalid XML received. Further retries may succeed:\n%s" % 485 (e, xml_string)) 486 return root 487 488 def _replace_nodes(self, parsed): 489 for key, value in parsed.items(): 490 if list(value): 491 sub_dict = self._build_name_to_xml_node(value) 492 parsed[key] = self._replace_nodes(sub_dict) 493 else: 494 parsed[key] = value.text 495 return parsed 496 497 @_text_content 498 def _handle_boolean(self, shape, text): 499 if text == 'true': 500 return True 501 else: 502 return False 503 504 @_text_content 505 def _handle_float(self, shape, text): 506 return float(text) 507 508 @_text_content 509 def _handle_timestamp(self, shape, text): 510 return self._timestamp_parser(text) 511 512 @_text_content 513 def _handle_integer(self, shape, text): 514 return int(text) 515 516 @_text_content 517 def _handle_string(self, shape, text): 518 return text 519 520 @_text_content 521 def _handle_blob(self, shape, text): 522 return self._blob_parser(text) 523 524 _handle_character = _handle_string 525 _handle_double = _handle_float 526 _handle_long = _handle_integer 527 528 529class QueryParser(BaseXMLResponseParser): 530 531 def _do_error_parse(self, response, shape): 532 xml_contents = response['body'] 533 root = self._parse_xml_string_to_dom(xml_contents) 534 parsed = self._build_name_to_xml_node(root) 535 self._replace_nodes(parsed) 536 # Once we've converted xml->dict, we need to make one or two 537 # more adjustments to extract nested errors and to be consistent 538 # with ResponseMetadata for non-error responses: 539 # 1. {"Errors": {"Error": {...}}} -> {"Error": {...}} 540 # 2. {"RequestId": "id"} -> {"ResponseMetadata": {"RequestId": "id"}} 541 if 'Errors' in parsed: 542 parsed.update(parsed.pop('Errors')) 543 if 'RequestId' in parsed: 544 parsed['ResponseMetadata'] = {'RequestId': parsed.pop('RequestId')} 545 return parsed 546 547 def _do_modeled_error_parse(self, response, shape): 548 return self._parse_body_as_xml(response, shape, inject_metadata=False) 549 550 def _do_parse(self, response, shape): 551 return self._parse_body_as_xml(response, shape, inject_metadata=True) 552 553 def _parse_body_as_xml(self, response, shape, inject_metadata=True): 554 xml_contents = response['body'] 555 root = self._parse_xml_string_to_dom(xml_contents) 556 parsed = {} 557 if shape is not None: 558 start = root 559 if 'resultWrapper' in shape.serialization: 560 start = self._find_result_wrapped_shape( 561 shape.serialization['resultWrapper'], 562 root) 563 parsed = self._parse_shape(shape, start) 564 if inject_metadata: 565 self._inject_response_metadata(root, parsed) 566 return parsed 567 568 def _find_result_wrapped_shape(self, element_name, xml_root_node): 569 mapping = self._build_name_to_xml_node(xml_root_node) 570 return mapping[element_name] 571 572 def _inject_response_metadata(self, node, inject_into): 573 mapping = self._build_name_to_xml_node(node) 574 child_node = mapping.get('ResponseMetadata') 575 if child_node is not None: 576 sub_mapping = self._build_name_to_xml_node(child_node) 577 for key, value in sub_mapping.items(): 578 sub_mapping[key] = value.text 579 inject_into['ResponseMetadata'] = sub_mapping 580 581 582class EC2QueryParser(QueryParser): 583 584 def _inject_response_metadata(self, node, inject_into): 585 mapping = self._build_name_to_xml_node(node) 586 child_node = mapping.get('requestId') 587 if child_node is not None: 588 inject_into['ResponseMetadata'] = {'RequestId': child_node.text} 589 590 def _do_error_parse(self, response, shape): 591 # EC2 errors look like: 592 # <Response> 593 # <Errors> 594 # <Error> 595 # <Code>InvalidInstanceID.Malformed</Code> 596 # <Message>Invalid id: "1343124"</Message> 597 # </Error> 598 # </Errors> 599 # <RequestID>12345</RequestID> 600 # </Response> 601 # This is different from QueryParser in that it's RequestID, 602 # not RequestId 603 original = super(EC2QueryParser, self)._do_error_parse(response, shape) 604 if 'RequestID' in original: 605 original['ResponseMetadata'] = { 606 'RequestId': original.pop('RequestID') 607 } 608 return original 609 610 def _get_error_root(self, original_root): 611 for child in original_root: 612 if self._node_tag(child) == 'Errors': 613 for errors_child in child: 614 if self._node_tag(errors_child) == 'Error': 615 return errors_child 616 return original_root 617 618 619class BaseJSONParser(ResponseParser): 620 621 def _handle_structure(self, shape, value): 622 final_parsed = {} 623 if shape.is_document_type: 624 final_parsed = value 625 else: 626 member_shapes = shape.members 627 if value is None: 628 # If the comes across the wire as "null" (None in python), 629 # we should be returning this unchanged, instead of as an 630 # empty dict. 631 return None 632 final_parsed = {} 633 if self._has_unknown_tagged_union_member(shape, value): 634 tag = self._get_first_key(value) 635 return self._handle_unknown_tagged_union_member(tag) 636 for member_name in member_shapes: 637 member_shape = member_shapes[member_name] 638 json_name = member_shape.serialization.get('name', member_name) 639 raw_value = value.get(json_name) 640 if raw_value is not None: 641 final_parsed[member_name] = self._parse_shape( 642 member_shapes[member_name], 643 raw_value) 644 return final_parsed 645 646 def _handle_map(self, shape, value): 647 parsed = {} 648 key_shape = shape.key 649 value_shape = shape.value 650 for key, value in value.items(): 651 actual_key = self._parse_shape(key_shape, key) 652 actual_value = self._parse_shape(value_shape, value) 653 parsed[actual_key] = actual_value 654 return parsed 655 656 def _handle_blob(self, shape, value): 657 return self._blob_parser(value) 658 659 def _handle_timestamp(self, shape, value): 660 return self._timestamp_parser(value) 661 662 def _do_error_parse(self, response, shape): 663 body = self._parse_body_as_json(response['body']) 664 error = {"Error": {"Message": '', "Code": ''}, "ResponseMetadata": {}} 665 # Error responses can have slightly different structures for json. 666 # The basic structure is: 667 # 668 # {"__type":"ConnectClientException", 669 # "message":"The error message."} 670 671 # The error message can either come in the 'message' or 'Message' key 672 # so we need to check for both. 673 error['Error']['Message'] = body.get('message', 674 body.get('Message', '')) 675 # if the message did not contain an error code 676 # include the response status code 677 response_code = response.get('status_code') 678 code = body.get('__type', response_code and str(response_code)) 679 if code is not None: 680 # code has a couple forms as well: 681 # * "com.aws.dynamodb.vAPI#ProvisionedThroughputExceededException" 682 # * "ResourceNotFoundException" 683 if '#' in code: 684 code = code.rsplit('#', 1)[1] 685 error['Error']['Code'] = code 686 self._inject_response_metadata(error, response['headers']) 687 return error 688 689 def _inject_response_metadata(self, parsed, headers): 690 if 'x-amzn-requestid' in headers: 691 parsed.setdefault('ResponseMetadata', {})['RequestId'] = ( 692 headers['x-amzn-requestid']) 693 694 def _parse_body_as_json(self, body_contents): 695 if not body_contents: 696 return {} 697 body = body_contents.decode(self.DEFAULT_ENCODING) 698 try: 699 original_parsed = json.loads(body) 700 return original_parsed 701 except ValueError: 702 # if the body cannot be parsed, include 703 # the literal string as the message 704 return {'message': body} 705 706 707class BaseEventStreamParser(ResponseParser): 708 709 def _do_parse(self, response, shape): 710 final_parsed = {} 711 if shape.serialization.get('eventstream'): 712 event_type = response['headers'].get(':event-type') 713 event_shape = shape.members.get(event_type) 714 if event_shape: 715 final_parsed[event_type] = self._do_parse(response, event_shape) 716 else: 717 self._parse_non_payload_attrs(response, shape, 718 shape.members, final_parsed) 719 self._parse_payload(response, shape, shape.members, final_parsed) 720 return final_parsed 721 722 def _do_error_parse(self, response, shape): 723 exception_type = response['headers'].get(':exception-type') 724 exception_shape = shape.members.get(exception_type) 725 if exception_shape is not None: 726 original_parsed = self._initial_body_parse(response['body']) 727 body = self._parse_shape(exception_shape, original_parsed) 728 error = { 729 'Error': { 730 'Code': exception_type, 731 'Message': body.get('Message', body.get('message', '')) 732 } 733 } 734 else: 735 error = { 736 'Error': { 737 'Code': response['headers'].get(':error-code', ''), 738 'Message': response['headers'].get(':error-message', ''), 739 } 740 } 741 return error 742 743 def _parse_payload(self, response, shape, member_shapes, final_parsed): 744 if shape.serialization.get('event'): 745 for name in member_shapes: 746 member_shape = member_shapes[name] 747 if member_shape.serialization.get('eventpayload'): 748 body = response['body'] 749 if member_shape.type_name == 'blob': 750 parsed_body = body 751 elif member_shape.type_name == 'string': 752 parsed_body = body.decode(self.DEFAULT_ENCODING) 753 else: 754 raw_parse = self._initial_body_parse(body) 755 parsed_body = self._parse_shape(member_shape, raw_parse) 756 final_parsed[name] = parsed_body 757 return 758 # If we didn't find an explicit payload, use the current shape 759 original_parsed = self._initial_body_parse(response['body']) 760 body_parsed = self._parse_shape(shape, original_parsed) 761 final_parsed.update(body_parsed) 762 763 def _parse_non_payload_attrs(self, response, shape, 764 member_shapes, final_parsed): 765 headers = response['headers'] 766 for name in member_shapes: 767 member_shape = member_shapes[name] 768 if member_shape.serialization.get('eventheader'): 769 if name in headers: 770 value = headers[name] 771 if member_shape.type_name == 'timestamp': 772 # Event stream timestamps are an in milleseconds so we 773 # divide by 1000 to convert to seconds. 774 value = self._timestamp_parser(value / 1000.0) 775 final_parsed[name] = value 776 777 def _initial_body_parse(self, body_contents): 778 # This method should do the initial xml/json parsing of the 779 # body. We we still need to walk the parsed body in order 780 # to convert types, but this method will do the first round 781 # of parsing. 782 raise NotImplementedError("_initial_body_parse") 783 784 785class EventStreamJSONParser(BaseEventStreamParser, BaseJSONParser): 786 787 def _initial_body_parse(self, body_contents): 788 return self._parse_body_as_json(body_contents) 789 790 791class EventStreamXMLParser(BaseEventStreamParser, BaseXMLResponseParser): 792 793 def _initial_body_parse(self, xml_string): 794 if not xml_string: 795 return ETree.Element('') 796 return self._parse_xml_string_to_dom(xml_string) 797 798 799class JSONParser(BaseJSONParser): 800 801 EVENT_STREAM_PARSER_CLS = EventStreamJSONParser 802 803 """Response parser for the "json" protocol.""" 804 def _do_parse(self, response, shape): 805 parsed = {} 806 if shape is not None: 807 event_name = shape.event_stream_name 808 if event_name: 809 parsed = self._handle_event_stream(response, shape, event_name) 810 else: 811 parsed = self._handle_json_body(response['body'], shape) 812 self._inject_response_metadata(parsed, response['headers']) 813 return parsed 814 815 def _do_modeled_error_parse(self, response, shape): 816 return self._handle_json_body(response['body'], shape) 817 818 def _handle_event_stream(self, response, shape, event_name): 819 event_stream_shape = shape.members[event_name] 820 event_stream = self._create_event_stream(response, event_stream_shape) 821 try: 822 event = event_stream.get_initial_response() 823 except NoInitialResponseError: 824 error_msg = 'First event was not of type initial-response' 825 raise ResponseParserError(error_msg) 826 parsed = self._handle_json_body(event.payload, shape) 827 parsed[event_name] = event_stream 828 return parsed 829 830 def _handle_json_body(self, raw_body, shape): 831 # The json.loads() gives us the primitive JSON types, 832 # but we need to traverse the parsed JSON data to convert 833 # to richer types (blobs, timestamps, etc. 834 parsed_json = self._parse_body_as_json(raw_body) 835 return self._parse_shape(shape, parsed_json) 836 837 838class BaseRestParser(ResponseParser): 839 840 def _do_parse(self, response, shape): 841 final_parsed = {} 842 final_parsed['ResponseMetadata'] = self._populate_response_metadata( 843 response) 844 self._add_modeled_parse(response, shape, final_parsed) 845 return final_parsed 846 847 def _add_modeled_parse(self, response, shape, final_parsed): 848 if shape is None: 849 return final_parsed 850 member_shapes = shape.members 851 self._parse_non_payload_attrs(response, shape, 852 member_shapes, final_parsed) 853 self._parse_payload(response, shape, member_shapes, final_parsed) 854 855 def _do_modeled_error_parse(self, response, shape): 856 final_parsed = {} 857 self._add_modeled_parse(response, shape, final_parsed) 858 return final_parsed 859 860 def _populate_response_metadata(self, response): 861 metadata = {} 862 headers = response['headers'] 863 if 'x-amzn-requestid' in headers: 864 metadata['RequestId'] = headers['x-amzn-requestid'] 865 elif 'x-amz-request-id' in headers: 866 metadata['RequestId'] = headers['x-amz-request-id'] 867 # HostId is what it's called whenever this value is returned 868 # in an XML response body, so to be consistent, we'll always 869 # call is HostId. 870 metadata['HostId'] = headers.get('x-amz-id-2', '') 871 return metadata 872 873 def _parse_payload(self, response, shape, member_shapes, final_parsed): 874 if 'payload' in shape.serialization: 875 # If a payload is specified in the output shape, then only that 876 # shape is used for the body payload. 877 payload_member_name = shape.serialization['payload'] 878 body_shape = member_shapes[payload_member_name] 879 if body_shape.serialization.get('eventstream'): 880 body = self._create_event_stream(response, body_shape) 881 final_parsed[payload_member_name] = body 882 elif body_shape.type_name in ['string', 'blob']: 883 # This is a stream 884 body = response['body'] 885 if isinstance(body, bytes): 886 body = body.decode(self.DEFAULT_ENCODING) 887 final_parsed[payload_member_name] = body 888 else: 889 original_parsed = self._initial_body_parse(response['body']) 890 final_parsed[payload_member_name] = self._parse_shape( 891 body_shape, original_parsed) 892 else: 893 original_parsed = self._initial_body_parse(response['body']) 894 body_parsed = self._parse_shape(shape, original_parsed) 895 final_parsed.update(body_parsed) 896 897 def _parse_non_payload_attrs(self, response, shape, 898 member_shapes, final_parsed): 899 headers = response['headers'] 900 for name in member_shapes: 901 member_shape = member_shapes[name] 902 location = member_shape.serialization.get('location') 903 if location is None: 904 continue 905 elif location == 'statusCode': 906 final_parsed[name] = self._parse_shape( 907 member_shape, response['status_code']) 908 elif location == 'headers': 909 final_parsed[name] = self._parse_header_map(member_shape, 910 headers) 911 elif location == 'header': 912 header_name = member_shape.serialization.get('name', name) 913 if header_name in headers: 914 final_parsed[name] = self._parse_shape( 915 member_shape, headers[header_name]) 916 917 def _parse_header_map(self, shape, headers): 918 # Note that headers are case insensitive, so we .lower() 919 # all header names and header prefixes. 920 parsed = {} 921 prefix = shape.serialization.get('name', '').lower() 922 for header_name in headers: 923 if header_name.lower().startswith(prefix): 924 # The key name inserted into the parsed hash 925 # strips off the prefix. 926 name = header_name[len(prefix):] 927 parsed[name] = headers[header_name] 928 return parsed 929 930 def _initial_body_parse(self, body_contents): 931 # This method should do the initial xml/json parsing of the 932 # body. We we still need to walk the parsed body in order 933 # to convert types, but this method will do the first round 934 # of parsing. 935 raise NotImplementedError("_initial_body_parse") 936 937 def _handle_string(self, shape, value): 938 parsed = value 939 if is_json_value_header(shape): 940 decoded = base64.b64decode(value).decode(self.DEFAULT_ENCODING) 941 parsed = json.loads(decoded) 942 return parsed 943 944 945class RestJSONParser(BaseRestParser, BaseJSONParser): 946 947 EVENT_STREAM_PARSER_CLS = EventStreamJSONParser 948 949 def _initial_body_parse(self, body_contents): 950 return self._parse_body_as_json(body_contents) 951 952 def _do_error_parse(self, response, shape): 953 error = super(RestJSONParser, self)._do_error_parse(response, shape) 954 self._inject_error_code(error, response) 955 return error 956 957 def _inject_error_code(self, error, response): 958 # The "Code" value can come from either a response 959 # header or a value in the JSON body. 960 body = self._initial_body_parse(response['body']) 961 if 'x-amzn-errortype' in response['headers']: 962 code = response['headers']['x-amzn-errortype'] 963 # Could be: 964 # x-amzn-errortype: ValidationException: 965 code = code.split(':')[0] 966 error['Error']['Code'] = code 967 elif 'code' in body or 'Code' in body: 968 error['Error']['Code'] = body.get( 969 'code', body.get('Code', '')) 970 971 972class RestXMLParser(BaseRestParser, BaseXMLResponseParser): 973 974 EVENT_STREAM_PARSER_CLS = EventStreamXMLParser 975 976 def _initial_body_parse(self, xml_string): 977 if not xml_string: 978 return ETree.Element('') 979 return self._parse_xml_string_to_dom(xml_string) 980 981 def _do_error_parse(self, response, shape): 982 # We're trying to be service agnostic here, but S3 does have a slightly 983 # different response structure for its errors compared to other 984 # rest-xml serivces (route53/cloudfront). We handle this by just 985 # trying to parse both forms. 986 # First: 987 # <ErrorResponse xmlns="..."> 988 # <Error> 989 # <Type>Sender</Type> 990 # <Code>InvalidInput</Code> 991 # <Message>Invalid resource type: foo</Message> 992 # </Error> 993 # <RequestId>request-id</RequestId> 994 # </ErrorResponse> 995 if response['body']: 996 # If the body ends up being invalid xml, the xml parser should not 997 # blow up. It should at least try to pull information about the 998 # the error response from other sources like the HTTP status code. 999 try: 1000 return self._parse_error_from_body(response) 1001 except ResponseParserError: 1002 LOG.debug( 1003 'Exception caught when parsing error response body:', 1004 exc_info=True) 1005 return self._parse_error_from_http_status(response) 1006 1007 def _parse_error_from_http_status(self, response): 1008 return { 1009 'Error': { 1010 'Code': str(response['status_code']), 1011 'Message': six.moves.http_client.responses.get( 1012 response['status_code'], ''), 1013 }, 1014 'ResponseMetadata': { 1015 'RequestId': response['headers'].get('x-amz-request-id', ''), 1016 'HostId': response['headers'].get('x-amz-id-2', ''), 1017 } 1018 } 1019 1020 def _parse_error_from_body(self, response): 1021 xml_contents = response['body'] 1022 root = self._parse_xml_string_to_dom(xml_contents) 1023 parsed = self._build_name_to_xml_node(root) 1024 self._replace_nodes(parsed) 1025 if root.tag == 'Error': 1026 # This is an S3 error response. First we'll populate the 1027 # response metadata. 1028 metadata = self._populate_response_metadata(response) 1029 # The RequestId and the HostId are already in the 1030 # ResponseMetadata, but are also duplicated in the XML 1031 # body. We don't need these values in both places, 1032 # we'll just remove them from the parsed XML body. 1033 parsed.pop('RequestId', '') 1034 parsed.pop('HostId', '') 1035 return {'Error': parsed, 'ResponseMetadata': metadata} 1036 elif 'RequestId' in parsed: 1037 # Other rest-xml serivces: 1038 parsed['ResponseMetadata'] = {'RequestId': parsed.pop('RequestId')} 1039 default = {'Error': {'Message': '', 'Code': ''}} 1040 merge_dicts(default, parsed) 1041 return default 1042 1043 @_text_content 1044 def _handle_string(self, shape, text): 1045 text = super(RestXMLParser, self)._handle_string(shape, text) 1046 return text 1047 1048 1049PROTOCOL_PARSERS = { 1050 'ec2': EC2QueryParser, 1051 'query': QueryParser, 1052 'json': JSONParser, 1053 'rest-json': RestJSONParser, 1054 'rest-xml': RestXMLParser, 1055} 1056