1import _base
2
3class Filter(_base.Filter):
4    def __init__(self, source, encoding):
5        _base.Filter.__init__(self, source)
6        self.encoding = encoding
7
8    def __iter__(self):
9        state = "pre_head"
10        meta_found = (self.encoding is None)
11        pending = []
12
13        for token in _base.Filter.__iter__(self):
14            type = token["type"]
15            if type == "StartTag":
16                if token["name"].lower() == u"head":
17                    state = "in_head"
18
19            elif type == "EmptyTag":
20                if token["name"].lower() == u"meta":
21                   # replace charset with actual encoding
22                   has_http_equiv_content_type = False
23                   for (namespace,name),value in token["data"].iteritems():
24                       if namespace != None:
25                           continue
26                       elif name.lower() == u'charset':
27                          token["data"][(namespace,name)] = self.encoding
28                          meta_found = True
29                          break
30                       elif name == u'http-equiv' and value.lower() == u'content-type':
31                           has_http_equiv_content_type = True
32                   else:
33                       if has_http_equiv_content_type and (None, u"content") in token["data"]:
34                           token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
35                           meta_found = True
36
37                elif token["name"].lower() == u"head" and not meta_found:
38                    # insert meta into empty head
39                    yield {"type": "StartTag", "name": u"head",
40                           "data": token["data"]}
41                    yield {"type": "EmptyTag", "name": u"meta",
42                           "data": {(None, u"charset"): self.encoding}}
43                    yield {"type": "EndTag", "name": u"head"}
44                    meta_found = True
45                    continue
46
47            elif type == "EndTag":
48                if token["name"].lower() == u"head" and pending:
49                    # insert meta into head (if necessary) and flush pending queue
50                    yield pending.pop(0)
51                    if not meta_found:
52                        yield {"type": "EmptyTag", "name": u"meta",
53                               "data": {(None, u"charset"): self.encoding}}
54                    while pending:
55                        yield pending.pop(0)
56                    meta_found = True
57                    state = "post_head"
58
59            if state == "in_head":
60                pending.append(token)
61            else:
62                yield token
63