1 #include "rss/parser.h"
2 
3 #include "3rd-party/catch.hpp"
4 #include "rss/exception.h"
5 #include "test-helpers/exceptionwithmsg.h"
6 
7 TEST_CASE("Throws exception if file doesn't exist", "[rsspp::Parser]")
8 {
9 	using TestHelpers::ExceptionWithMsg;
10 
11 	rsspp::Parser p;
12 
13 	REQUIRE_THROWS_MATCHES(p.parse_file("data/non-existent.xml"),
14 		rsspp::Exception,
15 		ExceptionWithMsg<rsspp::Exception>("could not parse file"));
16 }
17 
18 TEST_CASE("Throws exception if file can't be parsed", "[rsspp::Parser]")
19 {
20 	using TestHelpers::ExceptionWithMsg;
21 
22 	rsspp::Parser p;
23 
24 	REQUIRE_THROWS_MATCHES(p.parse_file("data/empty.xml"),
25 		rsspp::Exception,
26 		ExceptionWithMsg<rsspp::Exception>("could not parse file"));
27 }
28 
29 TEST_CASE("Extracts data from RSS 0.91", "[rsspp::Parser]")
30 {
31 	rsspp::Parser p;
32 	rsspp::Feed f;
33 
34 	REQUIRE_NOTHROW(f = p.parse_file("data/rss091_1.xml"));
35 
36 	REQUIRE(f.rss_version == rsspp::Feed::RSS_0_91);
37 	REQUIRE(f.title == "Example Channel");
38 	REQUIRE(f.description == "an example feed");
39 	REQUIRE(f.link == "http://example.com/");
40 	REQUIRE(f.language == "en");
41 
42 	REQUIRE(f.items.size() == 1u);
43 	REQUIRE(f.items[0].title == "1 < 2");
44 	REQUIRE(f.items[0].link == "http://example.com/1_less_than_2.html");
45 	REQUIRE(f.items[0].description ==
46 		"1 < 2, 3 < 4.\nIn HTML, <b> starts a bold phrase\nand you "
47 		"start a link with <a href=\n");
48 	REQUIRE(f.items[0].author == "");
49 	REQUIRE(f.items[0].guid == "");
50 }
51 
52 TEST_CASE("Doesn't crash or garble data if an item in RSS 0.9x contains "
53 	"an empty author tag",
54 	"[rsspp::Parser][issue542]")
55 {
56 	rsspp::Parser p;
57 	rsspp::Feed f;
58 
__anon7f2568d60102() 59 	const auto check = [&]() {
60 		REQUIRE(f.title == "A Channel with Unnamed Authors");
61 		REQUIRE(f.description == "an example feed");
62 		REQUIRE(f.link == "http://example.com/");
63 		REQUIRE(f.language == "en");
64 
65 		REQUIRE(f.items.size() == 2u);
66 
67 		REQUIRE(f.items[0].title == "This one has en empty author tag");
68 		REQUIRE(f.items[0].link == "http://example.com/test_1.html");
69 		REQUIRE(f.items[0].description == "It doesn't matter.");
70 		REQUIRE(f.items[0].author == "");
71 		REQUIRE(f.items[0].guid == "");
72 
73 		REQUIRE(f.items[1].title == "This one has en empty author tag as well");
74 		REQUIRE(f.items[1].link == "http://example.com/test_2.html");
75 		REQUIRE(f.items[1].description == "Non-empty description though.");
76 		REQUIRE(f.items[1].author == "");
77 		REQUIRE(f.items[1].guid == "");
78 	};
79 
80 	SECTION("RSS 0.91") {
81 		REQUIRE_NOTHROW(f = p.parse_file("data/rss_091_with_empty_author.xml"));
82 		REQUIRE(f.rss_version == rsspp::Feed::RSS_0_91);
83 		check();
84 	}
85 
86 	SECTION("RSS 0.92") {
87 		REQUIRE_NOTHROW(f = p.parse_file("data/rss_092_with_empty_author.xml"));
88 		REQUIRE(f.rss_version == rsspp::Feed::RSS_0_92);
89 		check();
90 	}
91 
92 	SECTION("RSS 0.94") {
93 		REQUIRE_NOTHROW(f = p.parse_file("data/rss_094_with_empty_author.xml"));
94 		REQUIRE(f.rss_version == rsspp::Feed::RSS_0_94);
95 		check();
96 	}
97 }
98 
99 TEST_CASE("Extracts data from RSS 0.92", "[rsspp::Parser]")
100 {
101 	rsspp::Parser p;
102 	rsspp::Feed f;
103 
104 	REQUIRE_NOTHROW(f = p.parse_file("data/rss092_1.xml"));
105 
106 	REQUIRE(f.rss_version == rsspp::Feed::RSS_0_92);
107 	REQUIRE(f.title == "Example Channel");
108 	REQUIRE(f.description == "an example feed");
109 	REQUIRE(f.link == "http://example.com/");
110 	REQUIRE(f.language == "en");
111 
112 	REQUIRE(f.items.size() == 3u);
113 
114 	REQUIRE(f.items[0].title == "1 < 2");
115 	REQUIRE(f.items[0].link == "http://example.com/1_less_than_2.html");
116 	REQUIRE(f.items[0].base == "http://example.com/feed/rss_testing.html");
117 
118 	REQUIRE(f.items[1].title == "A second item");
119 	REQUIRE(f.items[1].link == "http://example.com/a_second_item.html");
120 	REQUIRE(f.items[1].description == "no description");
121 	REQUIRE(f.items[1].author == "");
122 	REQUIRE(f.items[1].guid == "");
123 	REQUIRE(f.items[1].base == "http://example.com/item/rss_testing.html");
124 
125 	REQUIRE(f.items[2].title == "A third item");
126 	REQUIRE(f.items[2].link == "http://example.com/a_third_item.html");
127 	REQUIRE(f.items[2].description == "no description");
128 	REQUIRE(f.items[2].base == "http://example.com/desc/rss_testing.html");
129 }
130 
131 TEST_CASE("Extracts data fro RSS 2.0", "[rsspp::Parser]")
132 {
133 	rsspp::Parser p;
134 	rsspp::Feed f;
135 
136 	REQUIRE_NOTHROW(f = p.parse_file("data/rss20_1.xml"));
137 
138 	REQUIRE(f.title == "my weblog");
139 	REQUIRE(f.link == "http://example.com/blog/");
140 	REQUIRE(f.description == "my description");
141 
142 	REQUIRE(f.items.size() == 1u);
143 
144 	REQUIRE(f.items[0].title == "this is an item");
145 	REQUIRE(f.items[0].link ==
146 		"http://example.com/blog/this_is_an_item.html");
147 	REQUIRE(f.items[0].author == "Andreas Krennmair");
148 	REQUIRE(f.items[0].author_email == "blog@synflood.at");
149 	REQUIRE(f.items[0].content_encoded == "oh well, this is the content.");
150 	REQUIRE(f.items[0].pubDate == "Fri, 12 Dec 2008 02:36:10 +0100");
151 	REQUIRE(f.items[0].guid ==
152 		"http://example.com/blog/this_is_an_item.html");
153 	REQUIRE_FALSE(f.items[0].guid_isPermaLink);
154 }
155 
156 TEST_CASE("Extracts data from RSS 1.0", "[rsspp::Parser]")
157 {
158 	rsspp::Parser p;
159 	rsspp::Feed f;
160 
161 	REQUIRE_NOTHROW(f = p.parse_file("data/rss10_1.xml"));
162 
163 	REQUIRE(f.rss_version == rsspp::Feed::RSS_1_0);
164 
165 	REQUIRE(f.title == "Example Dot Org");
166 	REQUIRE(f.link == "http://www.example.org");
167 	REQUIRE(f.description == "the Example Organization web site");
168 
169 	REQUIRE(f.items.size() == 1u);
170 
171 	REQUIRE(f.items[0].title == "New Status Updates");
172 	REQUIRE(f.items[0].link == "http://www.example.org/status/foo");
173 	REQUIRE(f.items[0].guid == "http://www.example.org/status/");
174 	REQUIRE(f.items[0].description == "News about the Example project");
175 	REQUIRE(f.items[0].pubDate == "Tue, 30 Dec 2008 07:20:00 +0000");
176 }
177 
178 TEST_CASE("Extracts data from Atom 1.0", "[rsspp::Parser]")
179 {
180 	rsspp::Parser p;
181 	rsspp::Feed f;
182 
183 	REQUIRE_NOTHROW(f = p.parse_file("data/atom10_1.xml"));
184 
185 	REQUIRE(f.rss_version == rsspp::Feed::ATOM_1_0);
186 
187 	REQUIRE(f.title == "test atom");
188 	REQUIRE(f.title_type == "text");
189 	REQUIRE(f.description == "atom description!");
190 	REQUIRE(f.pubDate == "Tue, 30 Dec 2008 18:26:15 +0000");
191 	REQUIRE(f.link == "http://example.com/");
192 
193 	REQUIRE(f.items.size() == 3u);
194 	REQUIRE(f.items[0].title == "A gentle introduction to Atom testing");
195 	REQUIRE(f.items[0].title_type == "html");
196 	REQUIRE(f.items[0].link == "http://example.com/atom_testing.html");
197 	REQUIRE(f.items[0].guid == "tag:example.com,2008-12-30:/atom_testing");
198 	REQUIRE(f.items[0].description == "some content");
199 	REQUIRE(f.items[0].base == "http://example.com/feed/atom_testing.html");
200 	REQUIRE(f.items[0].author == "A Person");
201 
202 	REQUIRE(f.items[1].title == "A missing rel attribute");
203 	REQUIRE(f.items[1].title_type == "html");
204 	REQUIRE(f.items[1].link == "http://example.com/atom_testing.html");
205 	REQUIRE(f.items[1].guid == "tag:example.com,2008-12-30:/atom_testing1");
206 	REQUIRE(f.items[1].description == "some content");
207 	REQUIRE(f.items[1].base ==
208 		"http://example.com/entry/atom_testing.html");
209 	REQUIRE(f.items[1].author == "A different Person");
210 
211 	REQUIRE(f.items[2].title == "alternate link isn't first");
212 	REQUIRE(f.items[2].title_type == "html");
213 	REQUIRE(f.items[2].link == "http://example.com/atom_testing.html");
214 	REQUIRE(f.items[2].guid == "tag:example.com,2008-12-30:/atom_testing2");
215 	REQUIRE(f.items[2].description == "some content");
216 	REQUIRE(f.items[2].base ==
217 		"http://example.com/content/atom_testing.html");
218 	REQUIRE(f.items[2].author == "Person A, Person B");
219 }
220 
221 TEST_CASE("Extracts data from media:... tags in atom feed", "[rsspp::Parser]")
222 {
223 	rsspp::Parser p;
224 	rsspp::Feed f;
225 
226 	REQUIRE_NOTHROW(f = p.parse_file("data/atom10_2.xml"));
227 
228 	REQUIRE(f.rss_version == rsspp::Feed::ATOM_1_0);
229 
230 	REQUIRE(f.title == "Media test feed");
231 	REQUIRE(f.title_type == "text");
232 	REQUIRE(f.pubDate == "Tue, 30 Dec 2008 18:26:15 +0000");
233 	REQUIRE(f.link == "http://example.com/");
234 
235 	REQUIRE(f.items.size() == 5u);
236 	REQUIRE(f.items[0].title == "using regular content");
237 	REQUIRE(f.items[0].description == "regular html content");
238 	REQUIRE(f.items[0].description_mime_type == "text/html");
239 
240 	REQUIRE(f.items[1].title == "using media:description");
241 	REQUIRE(f.items[1].description == "media plaintext content");
242 	REQUIRE(f.items[1].description_mime_type == "text/plain");
243 
244 	REQUIRE(f.items[2].title == "using multiple media tags");
245 	REQUIRE(f.items[2].description == "media html content");
246 	REQUIRE(f.items[2].description_mime_type == "text/html");
247 	REQUIRE(f.items[2].link == "http://example.com/player.html");
248 
249 	REQUIRE(f.items[3].title ==
250 		"using multiple media tags nested in group/content");
251 	REQUIRE(f.items[3].description == "nested media html content");
252 	REQUIRE(f.items[3].description_mime_type == "text/html");
253 	REQUIRE(f.items[3].link == "http://example.com/player.html");
254 
255 	SECTION("media:{title,description,player} does not overwrite regular title, description, and link if they exist") {
256 		REQUIRE(f.items[4].title == "regular title");
257 		REQUIRE(f.items[4].description == "regular content");
258 		REQUIRE(f.items[4].description_mime_type == "text/html");
259 		REQUIRE(f.items[4].link == "http://example.com/regular-link");
260 	}
261 }
262 
263 TEST_CASE("Extracts data from media:... tags in  RSS 2.0 feeds",
264 	"[rsspp::Parser]")
265 {
266 	rsspp::Parser p;
267 	rsspp::Feed f;
268 
269 	REQUIRE_NOTHROW(f = p.parse_file("data/rss20_2.xml"));
270 
271 	REQUIRE(f.title == "my weblog");
272 	REQUIRE(f.link == "http://example.com/blog/");
273 	REQUIRE(f.description == "my description");
274 
275 	REQUIRE(f.items.size() == 2u);
276 
277 	REQUIRE(f.items[0].title == "using multiple media tags");
278 	REQUIRE(f.items[0].description == "media html content");
279 	REQUIRE(f.items[0].description_mime_type == "text/html");
280 	REQUIRE(f.items[0].link == "http://example.com/player.html");
281 
282 	REQUIRE(f.items[1].title ==
283 		"using multiple media tags nested in group/content");
284 	REQUIRE(f.items[1].description == "nested media html content");
285 	REQUIRE(f.items[1].description_mime_type == "text/html");
286 	REQUIRE(f.items[1].link == "http://example.com/player.html");
287 }
288