1import _base 2 3class Filter(_base.Filter): 4 def __init__(self, source, encoding): 5 _base.Filter.__init__(self, source) 6 self.encoding = encoding 7 8 def __iter__(self): 9 state = "pre_head" 10 meta_found = (self.encoding is None) 11 pending = [] 12 13 for token in _base.Filter.__iter__(self): 14 type = token["type"] 15 if type == "StartTag": 16 if token["name"].lower() == u"head": 17 state = "in_head" 18 19 elif type == "EmptyTag": 20 if token["name"].lower() == u"meta": 21 # replace charset with actual encoding 22 has_http_equiv_content_type = False 23 for (namespace,name),value in token["data"].iteritems(): 24 if namespace != None: 25 continue 26 elif name.lower() == u'charset': 27 token["data"][(namespace,name)] = self.encoding 28 meta_found = True 29 break 30 elif name == u'http-equiv' and value.lower() == u'content-type': 31 has_http_equiv_content_type = True 32 else: 33 if has_http_equiv_content_type and (None, u"content") in token["data"]: 34 token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding 35 meta_found = True 36 37 elif token["name"].lower() == u"head" and not meta_found: 38 # insert meta into empty head 39 yield {"type": "StartTag", "name": u"head", 40 "data": token["data"]} 41 yield {"type": "EmptyTag", "name": u"meta", 42 "data": {(None, u"charset"): self.encoding}} 43 yield {"type": "EndTag", "name": u"head"} 44 meta_found = True 45 continue 46 47 elif type == "EndTag": 48 if token["name"].lower() == u"head" and pending: 49 # insert meta into head (if necessary) and flush pending queue 50 yield pending.pop(0) 51 if not meta_found: 52 yield {"type": "EmptyTag", "name": u"meta", 53 "data": {(None, u"charset"): self.encoding}} 54 while pending: 55 yield pending.pop(0) 56 meta_found = True 57 state = "post_head" 58 59 if state == "in_head": 60 pending.append(token) 61 else: 62 yield token 63