1from __future__ import absolute_import, division, unicode_literals 2 3from . import _base 4 5 6class Filter(_base.Filter): 7 def __init__(self, source, encoding): 8 _base.Filter.__init__(self, source) 9 self.encoding = encoding 10 11 def __iter__(self): 12 state = "pre_head" 13 meta_found = (self.encoding is None) 14 pending = [] 15 16 for token in _base.Filter.__iter__(self): 17 type = token["type"] 18 if type == "StartTag": 19 if token["name"].lower() == "head": 20 state = "in_head" 21 22 elif type == "EmptyTag": 23 if token["name"].lower() == "meta": 24 # replace charset with actual encoding 25 has_http_equiv_content_type = False 26 for (namespace, name), value in token["data"].items(): 27 if namespace is not None: 28 continue 29 elif name.lower() == 'charset': 30 token["data"][(namespace, name)] = self.encoding 31 meta_found = True 32 break 33 elif name == 'http-equiv' and value.lower() == 'content-type': 34 has_http_equiv_content_type = True 35 else: 36 if has_http_equiv_content_type and (None, "content") in token["data"]: 37 token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding 38 meta_found = True 39 40 elif token["name"].lower() == "head" and not meta_found: 41 # insert meta into empty head 42 yield {"type": "StartTag", "name": "head", 43 "data": token["data"]} 44 yield {"type": "EmptyTag", "name": "meta", 45 "data": {(None, "charset"): self.encoding}} 46 yield {"type": "EndTag", "name": "head"} 47 meta_found = True 48 continue 49 50 elif type == "EndTag": 51 if token["name"].lower() == "head" and pending: 52 # insert meta into head (if necessary) and flush pending queue 53 yield pending.pop(0) 54 if not meta_found: 55 yield {"type": "EmptyTag", "name": "meta", 56 "data": {(None, "charset"): self.encoding}} 57 while pending: 58 yield pending.pop(0) 59 meta_found = True 60 state = "post_head" 61 62 if state == "in_head": 63 pending.append(token) 64 else: 65 yield token 66