1package shared
2
3import (
4	"strings"
5
6	"github.com/mmcdole/gofeed/extensions"
7	"github.com/mmcdole/goxpp"
8)
9
10// IsExtension returns whether or not the current
11// XML element is an extension element (if it has a
12// non empty prefix)
13func IsExtension(p *xpp.XMLPullParser) bool {
14	space := strings.TrimSpace(p.Space)
15	if prefix, ok := p.Spaces[space]; ok {
16		return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content")
17	}
18
19	return p.Space != ""
20}
21
22// ParseExtension parses the current element of the
23// XMLPullParser as an extension element and updates
24// the extension map
25func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
26	prefix := prefixForNamespace(p.Space, p)
27
28	result, err := parseExtensionElement(p)
29	if err != nil {
30		return nil, err
31	}
32
33	// Ensure the extension prefix map exists
34	if _, ok := fe[prefix]; !ok {
35		fe[prefix] = map[string][]ext.Extension{}
36	}
37	// Ensure the extension element slice exists
38	if _, ok := fe[prefix][p.Name]; !ok {
39		fe[prefix][p.Name] = []ext.Extension{}
40	}
41
42	fe[prefix][p.Name] = append(fe[prefix][p.Name], result)
43	return fe, nil
44}
45
46func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
47	if err = p.Expect(xpp.StartTag, "*"); err != nil {
48		return e, err
49	}
50
51	e.Name = p.Name
52	e.Children = map[string][]ext.Extension{}
53	e.Attrs = map[string]string{}
54
55	for _, attr := range p.Attrs {
56		// TODO: Alright that we are stripping
57		// namespace information from attributes ?
58		e.Attrs[attr.Name.Local] = attr.Value
59	}
60
61	for {
62		tok, err := p.Next()
63		if err != nil {
64			return e, err
65		}
66
67		if tok == xpp.EndTag {
68			break
69		}
70
71		if tok == xpp.StartTag {
72			child, err := parseExtensionElement(p)
73			if err != nil {
74				return e, err
75			}
76
77			if _, ok := e.Children[child.Name]; !ok {
78				e.Children[child.Name] = []ext.Extension{}
79			}
80
81			e.Children[child.Name] = append(e.Children[child.Name], child)
82		} else if tok == xpp.Text {
83			e.Value += p.Text
84		}
85	}
86
87	e.Value = strings.TrimSpace(e.Value)
88
89	if err = p.Expect(xpp.EndTag, e.Name); err != nil {
90		return e, err
91	}
92
93	return e, nil
94}
95
96func prefixForNamespace(space string, p *xpp.XMLPullParser) string {
97	// First we check if the global namespace map
98	// contains an entry for this namespace/prefix.
99	// This way we can use the canonical prefix for this
100	// ns instead of the one defined in the feed.
101	if prefix, ok := canonicalNamespaces[space]; ok {
102		return prefix
103	}
104
105	// Next we check if the feed itself defined this
106	// this namespace and return it if we have a result.
107	if prefix, ok := p.Spaces[space]; ok {
108		return prefix
109	}
110
111	// Lastly, any namespace which is not defined in the
112	// the feed will be the prefix itself when using Go's
113	// xml.Decoder.Token() method.
114	return space
115}
116
117// Namespaces taken from github.com/kurtmckee/feedparser
118// These are used for determining canonical name space prefixes
119// for many of the popular RSS/Atom extensions.
120//
121// These canonical prefixes override any prefixes used in the feed itself.
122var canonicalNamespaces = map[string]string{
123	"http://webns.net/mvcb/":                                         "admin",
124	"http://purl.org/rss/1.0/modules/aggregation/":                   "ag",
125	"http://purl.org/rss/1.0/modules/annotate/":                      "annotate",
126	"http://media.tangent.org/rss/1.0/":                              "audio",
127	"http://backend.userland.com/blogChannelModule":                  "blogChannel",
128	"http://creativecommons.org/ns#license":                          "cc",
129	"http://web.resource.org/cc/":                                    "cc",
130	"http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": "creativeCommons",
131	"http://backend.userland.com/creativeCommonsRssModule":           "creativeCommons",
132	"http://purl.org/rss/1.0/modules/company":                        "co",
133	"http://purl.org/rss/1.0/modules/content/":                       "content",
134	"http://my.theinfo.org/changed/1.0/rss/":                         "cp",
135	"http://purl.org/dc/elements/1.1/":                               "dc",
136	"http://purl.org/dc/terms/":                                      "dcterms",
137	"http://purl.org/rss/1.0/modules/email/":                         "email",
138	"http://purl.org/rss/1.0/modules/event/":                         "ev",
139	"http://rssnamespace.org/feedburner/ext/1.0":                     "feedburner",
140	"http://freshmeat.net/rss/fm/":                                   "fm",
141	"http://xmlns.com/foaf/0.1/":                                     "foaf",
142	"http://www.w3.org/2003/01/geo/wgs84_pos#":                       "geo",
143	"http://www.georss.org/georss":                                   "georss",
144	"http://www.opengis.net/gml":                                     "gml",
145	"http://postneo.com/icbm/":                                       "icbm",
146	"http://purl.org/rss/1.0/modules/image/":                         "image",
147	"http://www.itunes.com/DTDs/PodCast-1.0.dtd":                     "itunes",
148	"http://example.com/DTDs/PodCast-1.0.dtd":                        "itunes",
149	"http://purl.org/rss/1.0/modules/link/":                          "l",
150	"http://search.yahoo.com/mrss":                                   "media",
151	"http://search.yahoo.com/mrss/":                                  "media",
152	"http://madskills.com/public/xml/rss/module/pingback/":           "pingback",
153	"http://prismstandard.org/namespaces/1.2/basic/":                 "prism",
154	"http://www.w3.org/1999/02/22-rdf-syntax-ns#":                    "rdf",
155	"http://www.w3.org/2000/01/rdf-schema#":                          "rdfs",
156	"http://purl.org/rss/1.0/modules/reference/":                     "ref",
157	"http://purl.org/rss/1.0/modules/richequiv/":                     "reqv",
158	"http://purl.org/rss/1.0/modules/search/":                        "search",
159	"http://purl.org/rss/1.0/modules/slash/":                         "slash",
160	"http://schemas.xmlsoap.org/soap/envelope/":                      "soap",
161	"http://purl.org/rss/1.0/modules/servicestatus/":                 "ss",
162	"http://hacks.benhammersley.com/rss/streaming/":                  "str",
163	"http://purl.org/rss/1.0/modules/subscription/":                  "sub",
164	"http://purl.org/rss/1.0/modules/syndication/":                   "sy",
165	"http://schemas.pocketsoap.com/rss/myDescModule/":                "szf",
166	"http://purl.org/rss/1.0/modules/taxonomy/":                      "taxo",
167	"http://purl.org/rss/1.0/modules/threading/":                     "thr",
168	"http://purl.org/rss/1.0/modules/textinput/":                     "ti",
169	"http://madskills.com/public/xml/rss/module/trackback/":          "trackback",
170	"http://wellformedweb.org/commentAPI/":                           "wfw",
171	"http://purl.org/rss/1.0/modules/wiki/":                          "wiki",
172	"http://www.w3.org/1999/xhtml":                                   "xhtml",
173	"http://www.w3.org/1999/xlink":                                   "xlink",
174	"http://www.w3.org/XML/1998/namespace":                           "xml",
175	"http://podlove.org/simple-chapters":                             "psc",
176}
177