1from __future__ import absolute_import, division, unicode_literals 2 3from collections import OrderedDict 4import re 5 6from six import string_types 7 8from . import base 9from .._utils import moduleFactoryFactory 10 11tag_regexp = re.compile("{([^}]*)}(.*)") 12 13 14def getETreeBuilder(ElementTreeImplementation): 15 ElementTree = ElementTreeImplementation 16 ElementTreeCommentType = ElementTree.Comment("asd").tag 17 18 class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable 19 """Given the particular ElementTree representation, this implementation, 20 to avoid using recursion, returns "nodes" as tuples with the following 21 content: 22 23 1. The current element 24 25 2. The index of the element relative to its parent 26 27 3. A stack of ancestor elements 28 29 4. A flag "text", "tail" or None to indicate if the current node is a 30 text node; either the text or tail of the current element (1) 31 """ 32 def getNodeDetails(self, node): 33 if isinstance(node, tuple): # It might be the root Element 34 elt, _, _, flag = node 35 if flag in ("text", "tail"): 36 return base.TEXT, getattr(elt, flag) 37 else: 38 node = elt 39 40 if not(hasattr(node, "tag")): 41 node = node.getroot() 42 43 if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): 44 return (base.DOCUMENT,) 45 46 elif node.tag == "<!DOCTYPE>": 47 return (base.DOCTYPE, node.text, 48 node.get("publicId"), node.get("systemId")) 49 50 elif node.tag == ElementTreeCommentType: 51 return base.COMMENT, node.text 52 53 else: 54 assert isinstance(node.tag, string_types), type(node.tag) 55 # This is assumed to be an ordinary element 56 match = tag_regexp.match(node.tag) 57 if match: 58 namespace, tag = match.groups() 59 else: 60 namespace = None 61 tag = node.tag 62 attrs = OrderedDict() 63 for name, value in list(node.attrib.items()): 64 match = tag_regexp.match(name) 65 if match: 66 attrs[(match.group(1), match.group(2))] = value 67 else: 68 attrs[(None, name)] = value 69 return (base.ELEMENT, namespace, tag, 70 attrs, len(node) or node.text) 71 72 def getFirstChild(self, node): 73 if isinstance(node, tuple): 74 element, key, parents, flag = node 75 else: 76 element, key, parents, flag = node, None, [], None 77 78 if flag in ("text", "tail"): 79 return None 80 else: 81 if element.text: 82 return element, key, parents, "text" 83 elif len(element): 84 parents.append(element) 85 return element[0], 0, parents, None 86 else: 87 return None 88 89 def getNextSibling(self, node): 90 if isinstance(node, tuple): 91 element, key, parents, flag = node 92 else: 93 return None 94 95 if flag == "text": 96 if len(element): 97 parents.append(element) 98 return element[0], 0, parents, None 99 else: 100 return None 101 else: 102 if element.tail and flag != "tail": 103 return element, key, parents, "tail" 104 elif key < len(parents[-1]) - 1: 105 return parents[-1][key + 1], key + 1, parents, None 106 else: 107 return None 108 109 def getParentNode(self, node): 110 if isinstance(node, tuple): 111 element, key, parents, flag = node 112 else: 113 return None 114 115 if flag == "text": 116 if not parents: 117 return element 118 else: 119 return element, key, parents, None 120 else: 121 parent = parents.pop() 122 if not parents: 123 return parent 124 else: 125 assert list(parents[-1]).count(parent) == 1 126 return parent, list(parents[-1]).index(parent), parents, None 127 128 return locals() 129 130 131getETreeModule = moduleFactoryFactory(getETreeBuilder) 132