1""" 2Legacy module - don't use in new code! 3 4html5lib now has its own proper implementation. 5 6This module implements a tree builder for html5lib that generates lxml 7html element trees. This module uses camelCase as it follows the 8html5lib style guide. 9""" 10 11from html5lib.treebuilders import _base, etree as etree_builders 12from lxml import html, etree 13 14 15class DocumentType(object): 16 17 def __init__(self, name, publicId, systemId): 18 self.name = name 19 self.publicId = publicId 20 self.systemId = systemId 21 22class Document(object): 23 24 def __init__(self): 25 self._elementTree = None 26 self.childNodes = [] 27 28 def appendChild(self, element): 29 self._elementTree.getroot().addnext(element._element) 30 31 32class TreeBuilder(_base.TreeBuilder): 33 documentClass = Document 34 doctypeClass = DocumentType 35 elementClass = None 36 commentClass = None 37 fragmentClass = Document 38 39 def __init__(self, *args, **kwargs): 40 html_builder = etree_builders.getETreeModule(html, fullTree=False) 41 etree_builder = etree_builders.getETreeModule(etree, fullTree=False) 42 self.elementClass = html_builder.Element 43 self.commentClass = etree_builder.Comment 44 _base.TreeBuilder.__init__(self, *args, **kwargs) 45 46 def reset(self): 47 _base.TreeBuilder.reset(self) 48 self.rootInserted = False 49 self.initialComments = [] 50 self.doctype = None 51 52 def getDocument(self): 53 return self.document._elementTree 54 55 def getFragment(self): 56 fragment = [] 57 element = self.openElements[0]._element 58 if element.text: 59 fragment.append(element.text) 60 fragment.extend(element.getchildren()) 61 if element.tail: 62 fragment.append(element.tail) 63 return fragment 64 65 def insertDoctype(self, name, publicId, systemId): 66 doctype = self.doctypeClass(name, publicId, systemId) 67 self.doctype = doctype 68 69 def insertComment(self, data, parent=None): 70 if not self.rootInserted: 71 self.initialComments.append(data) 72 else: 73 _base.TreeBuilder.insertComment(self, data, parent) 74 75 def insertRoot(self, name): 76 buf = [] 77 if self.doctype and self.doctype.name: 78 buf.append('<!DOCTYPE %s' % self.doctype.name) 79 if self.doctype.publicId is not None or self.doctype.systemId is not None: 80 buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId, 81 self.doctype.systemId)) 82 buf.append('>') 83 buf.append('<html></html>') 84 root = html.fromstring(''.join(buf)) 85 86 # Append the initial comments: 87 for comment in self.initialComments: 88 root.addprevious(etree.Comment(comment)) 89 90 # Create the root document and add the ElementTree to it 91 self.document = self.documentClass() 92 self.document._elementTree = root.getroottree() 93 94 # Add the root element to the internal child/open data structures 95 root_element = self.elementClass(name) 96 root_element._element = root 97 self.document.childNodes.append(root_element) 98 self.openElements.append(root_element) 99 100 self.rootInserted = True 101