1package shared 2 3import ( 4 "strings" 5 6 "github.com/mmcdole/gofeed/extensions" 7 "github.com/mmcdole/goxpp" 8) 9 10// IsExtension returns whether or not the current 11// XML element is an extension element (if it has a 12// non empty prefix) 13func IsExtension(p *xpp.XMLPullParser) bool { 14 space := strings.TrimSpace(p.Space) 15 if prefix, ok := p.Spaces[space]; ok { 16 return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content") 17 } 18 19 return p.Space != "" 20} 21 22// ParseExtension parses the current element of the 23// XMLPullParser as an extension element and updates 24// the extension map 25func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) { 26 prefix := prefixForNamespace(p.Space, p) 27 28 result, err := parseExtensionElement(p) 29 if err != nil { 30 return nil, err 31 } 32 33 // Ensure the extension prefix map exists 34 if _, ok := fe[prefix]; !ok { 35 fe[prefix] = map[string][]ext.Extension{} 36 } 37 // Ensure the extension element slice exists 38 if _, ok := fe[prefix][p.Name]; !ok { 39 fe[prefix][p.Name] = []ext.Extension{} 40 } 41 42 fe[prefix][p.Name] = append(fe[prefix][p.Name], result) 43 return fe, nil 44} 45 46func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) { 47 if err = p.Expect(xpp.StartTag, "*"); err != nil { 48 return e, err 49 } 50 51 e.Name = p.Name 52 e.Children = map[string][]ext.Extension{} 53 e.Attrs = map[string]string{} 54 55 for _, attr := range p.Attrs { 56 // TODO: Alright that we are stripping 57 // namespace information from attributes ? 58 e.Attrs[attr.Name.Local] = attr.Value 59 } 60 61 for { 62 tok, err := p.Next() 63 if err != nil { 64 return e, err 65 } 66 67 if tok == xpp.EndTag { 68 break 69 } 70 71 if tok == xpp.StartTag { 72 child, err := parseExtensionElement(p) 73 if err != nil { 74 return e, err 75 } 76 77 if _, ok := e.Children[child.Name]; !ok { 78 e.Children[child.Name] = []ext.Extension{} 79 } 80 81 e.Children[child.Name] = append(e.Children[child.Name], child) 82 } else if tok == xpp.Text { 83 e.Value += p.Text 84 } 85 } 86 87 e.Value = strings.TrimSpace(e.Value) 88 89 if err = p.Expect(xpp.EndTag, e.Name); err != nil { 90 return e, err 91 } 92 93 return e, nil 94} 95 96func prefixForNamespace(space string, p *xpp.XMLPullParser) string { 97 // First we check if the global namespace map 98 // contains an entry for this namespace/prefix. 99 // This way we can use the canonical prefix for this 100 // ns instead of the one defined in the feed. 101 if prefix, ok := canonicalNamespaces[space]; ok { 102 return prefix 103 } 104 105 // Next we check if the feed itself defined this 106 // this namespace and return it if we have a result. 107 if prefix, ok := p.Spaces[space]; ok { 108 return prefix 109 } 110 111 // Lastly, any namespace which is not defined in the 112 // the feed will be the prefix itself when using Go's 113 // xml.Decoder.Token() method. 114 return space 115} 116 117// Namespaces taken from github.com/kurtmckee/feedparser 118// These are used for determining canonical name space prefixes 119// for many of the popular RSS/Atom extensions. 120// 121// These canonical prefixes override any prefixes used in the feed itself. 122var canonicalNamespaces = map[string]string{ 123 "http://webns.net/mvcb/": "admin", 124 "http://purl.org/rss/1.0/modules/aggregation/": "ag", 125 "http://purl.org/rss/1.0/modules/annotate/": "annotate", 126 "http://media.tangent.org/rss/1.0/": "audio", 127 "http://backend.userland.com/blogChannelModule": "blogChannel", 128 "http://creativecommons.org/ns#license": "cc", 129 "http://web.resource.org/cc/": "cc", 130 "http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html": "creativeCommons", 131 "http://backend.userland.com/creativeCommonsRssModule": "creativeCommons", 132 "http://purl.org/rss/1.0/modules/company": "co", 133 "http://purl.org/rss/1.0/modules/content/": "content", 134 "http://my.theinfo.org/changed/1.0/rss/": "cp", 135 "http://purl.org/dc/elements/1.1/": "dc", 136 "http://purl.org/dc/terms/": "dcterms", 137 "http://purl.org/rss/1.0/modules/email/": "email", 138 "http://purl.org/rss/1.0/modules/event/": "ev", 139 "http://rssnamespace.org/feedburner/ext/1.0": "feedburner", 140 "http://freshmeat.net/rss/fm/": "fm", 141 "http://xmlns.com/foaf/0.1/": "foaf", 142 "http://www.w3.org/2003/01/geo/wgs84_pos#": "geo", 143 "http://www.georss.org/georss": "georss", 144 "http://www.opengis.net/gml": "gml", 145 "http://postneo.com/icbm/": "icbm", 146 "http://purl.org/rss/1.0/modules/image/": "image", 147 "http://www.itunes.com/DTDs/PodCast-1.0.dtd": "itunes", 148 "http://example.com/DTDs/PodCast-1.0.dtd": "itunes", 149 "http://purl.org/rss/1.0/modules/link/": "l", 150 "http://search.yahoo.com/mrss": "media", 151 "http://search.yahoo.com/mrss/": "media", 152 "http://madskills.com/public/xml/rss/module/pingback/": "pingback", 153 "http://prismstandard.org/namespaces/1.2/basic/": "prism", 154 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 155 "http://www.w3.org/2000/01/rdf-schema#": "rdfs", 156 "http://purl.org/rss/1.0/modules/reference/": "ref", 157 "http://purl.org/rss/1.0/modules/richequiv/": "reqv", 158 "http://purl.org/rss/1.0/modules/search/": "search", 159 "http://purl.org/rss/1.0/modules/slash/": "slash", 160 "http://schemas.xmlsoap.org/soap/envelope/": "soap", 161 "http://purl.org/rss/1.0/modules/servicestatus/": "ss", 162 "http://hacks.benhammersley.com/rss/streaming/": "str", 163 "http://purl.org/rss/1.0/modules/subscription/": "sub", 164 "http://purl.org/rss/1.0/modules/syndication/": "sy", 165 "http://schemas.pocketsoap.com/rss/myDescModule/": "szf", 166 "http://purl.org/rss/1.0/modules/taxonomy/": "taxo", 167 "http://purl.org/rss/1.0/modules/threading/": "thr", 168 "http://purl.org/rss/1.0/modules/textinput/": "ti", 169 "http://madskills.com/public/xml/rss/module/trackback/": "trackback", 170 "http://wellformedweb.org/commentAPI/": "wfw", 171 "http://purl.org/rss/1.0/modules/wiki/": "wiki", 172 "http://www.w3.org/1999/xhtml": "xhtml", 173 "http://www.w3.org/1999/xlink": "xlink", 174 "http://www.w3.org/XML/1998/namespace": "xml", 175 "http://podlove.org/simple-chapters": "psc", 176} 177