1 /*
2 * Copyright (C) 2006 Andrej Kacian <andrej@kacian.sk>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
18 */
19 #define __USE_GNU
20
21 #include <glib.h>
22 #include <expat.h>
23 #include <string.h>
24 #include <stdio.h>
25
26 #include <procheader.h>
27
28 #include "feed.h"
29 #include "feeditem.h"
30 #include "date.h"
31 #include "parser.h"
32 #include "parser_atom10.h"
33
34 enum {
35 FEED_LOC_ATOM10_NONE,
36 FEED_LOC_ATOM10_ENTRY,
37 FEED_LOC_ATOM10_AUTHOR,
38 FEED_LOC_ATOM10_SOURCE,
39 FEED_LOC_ATOM10_CONTENT
40 } FeedAtom10Locations;
41
feed_parser_atom10_start(void * data,const gchar * el,const gchar ** attr)42 void feed_parser_atom10_start(void *data, const gchar *el, const gchar **attr)
43 {
44 FeedParserCtx *ctx = (FeedParserCtx *)data;
45 gchar *a = NULL;
46
47 if( ctx->depth == 1 ) {
48
49 if( !strcmp(el, "entry") ) {
50 /* Start of new feed item found.
51 * Create a new FeedItem, freeing the one we already have, if any. */
52 if( ctx->curitem != NULL )
53 feed_item_free(ctx->curitem);
54 ctx->curitem = feed_item_new(ctx->feed);
55 ctx->location = FEED_LOC_ATOM10_ENTRY;
56 } else if( !strcmp(el, "author") ) {
57 /* Start of author info for the feed found.
58 * Set correct location. */
59 ctx->location = FEED_LOC_ATOM10_AUTHOR;
60 } else if( !strcmp(el, "link") ) {
61 if (!feed_parser_get_attribute_value(attr, "rel")) {
62 /* Link tag for the feed */
63 g_free(ctx->feed->link);
64 ctx->feed->link =
65 g_strdup(feed_parser_get_attribute_value(attr, "href"));
66 }
67 } else ctx->location = FEED_LOC_ATOM10_NONE;
68
69 } else if( ctx->depth == 2 ) {
70
71 /* Make sure we are in one of known locations within the XML structure.
72 * This condition should never be true on a valid Atom feed. */
73 if (ctx->location != FEED_LOC_ATOM10_AUTHOR &&
74 ctx->location != FEED_LOC_ATOM10_ENTRY) {
75 ctx->depth++;
76 return;
77 }
78
79 if( !strcmp(el, "author") ) {
80 /* Start of author info for current feed item.
81 * Set correct location. */
82 ctx->location = FEED_LOC_ATOM10_AUTHOR;
83 } else if( !strcmp(el, "link") ) {
84 /* Capture item URL, from the "url" XML attribute. */
85 if (ctx->curitem && ctx->location == FEED_LOC_ATOM10_ENTRY)
86 ctx->curitem->url = g_strdup(feed_parser_get_attribute_value(attr, "href"));
87 } else if( !strcmp(el, "source") ) {
88 ctx->location = FEED_LOC_ATOM10_SOURCE;
89 } else ctx->location = FEED_LOC_ATOM10_ENTRY;
90
91 if( !strcmp(el, "title") && ctx->curitem != NULL) {
92 a = feed_parser_get_attribute_value(attr, "type");
93 if( !a || !strcmp(a, "text") )
94 ctx->curitem->title_format = FEED_ITEM_TITLE_TEXT;
95 else if( !strcmp(a, "html") )
96 ctx->curitem->title_format = FEED_ITEM_TITLE_HTML;
97 else if( !strcmp(a, "xhtml") )
98 ctx->curitem->title_format = FEED_ITEM_TITLE_XHTML;
99 else
100 ctx->curitem->title_format = FEED_ITEM_TITLE_UNKNOWN;
101 } else if (!strcmp(el, "content") && ctx->curitem != NULL) {
102 ctx->location = FEED_LOC_ATOM10_CONTENT;
103 a = feed_parser_get_attribute_value(attr, "type");
104 if (a && !strcmp(a, "xhtml")) {
105 ctx->curitem->xhtml_content = TRUE;
106 ctx->xhtml_str = g_string_new(NULL);
107 }
108 }
109 } else if (ctx->depth >= 3) {
110 if (ctx->location == FEED_LOC_ATOM10_CONTENT
111 && ctx->curitem != NULL
112 && ctx->curitem->xhtml_content) {
113 guint i;
114 GString *txt = ctx->xhtml_str;
115 g_string_append_c(txt, '<');
116 g_string_append(txt, el);
117
118 for (i = 0; attr[i] != NULL && attr[i+1] != NULL; i += 2) {
119 g_string_append_printf(txt, " %s='%s'", attr[i], attr[i+1]);
120 }
121 g_string_append_c(txt, '>');
122 }
123 }
124
125
126 ctx->depth++;
127 }
128
feed_parser_atom10_end(void * data,const gchar * el)129 void feed_parser_atom10_end(void *data, const gchar *el)
130 {
131 FeedParserCtx *ctx = (FeedParserCtx *)data;
132 Feed *feed = ctx->feed;
133 gchar *text = NULL, *tmp;
134
135 if( ctx->str != NULL )
136 text = g_strstrip(g_strdup(ctx->str->str));
137 else
138 text = "";
139
140 switch( ctx->depth ) {
141
142 case 0:
143 /* Just in case. */
144 break;
145
146 case 1:
147
148 if( !strcmp(el, "feed") ) {
149 /* We have finished parsing the feed, reverse the list
150 * so it's not upside down. */
151 feed->items = g_slist_reverse(ctx->feed->items);
152 }
153
154 break;
155
156 case 2:
157
158 /* decide if we just received </entry>, so we can
159 * add a complete item to feed */
160 if( !strcmp(el, "entry") ) {
161
162 /* Fix up URL, if it is relative */
163 if (ctx->curitem->url != NULL &&
164 !strstr(ctx->curitem->url, "://") &&
165 ctx->feed->link != NULL) {
166 tmp = g_strconcat(ctx->feed->link,
167 (ctx->curitem->url[0] == '/' ? "" : "/"),
168 ctx->curitem->url, NULL);
169 feed_item_set_url(ctx->curitem, tmp);
170 g_free(tmp);
171 }
172
173 /* append the complete feed item */
174 if( ctx->curitem->id && ctx->curitem->title
175 && ctx->curitem->date_modified ) {
176 feed->items =
177 g_slist_prepend(feed->items, (gpointer)ctx->curitem);
178 }
179
180 /* since it's in the linked list, lose this pointer */
181 ctx->curitem = NULL;
182
183 } else if( !strcmp(el, "title") ) { /* so it wasn't end of item */
184 FILL(feed->title)
185 } else if( !strcmp(el, "summary" ) ) {
186 FILL(feed->description)
187 } else if( !strcmp(el, "updated" ) ) {
188 feed->date = procheader_date_parse(NULL, text, 0);
189 }
190 /* FIXME: add more later */
191
192 break;
193
194 case 3:
195
196 if( ctx->curitem == NULL )
197 break;
198
199 switch(ctx->location) {
200
201 /* We're in feed/entry */
202 case FEED_LOC_ATOM10_ENTRY:
203 if( !strcmp(el, "title") ) {
204 FILL(ctx->curitem->title)
205 } else if( !strcmp(el, "summary") ) {
206 FILL(ctx->curitem->summary)
207 } else if( !strcmp(el, "id") ) {
208 FILL(ctx->curitem->id)
209 feed_item_set_id_permalink(ctx->curitem, TRUE);
210 } else if( !strcmp(el, "published") ) {
211 ctx->curitem->date_published = procheader_date_parse(NULL, text, 0);
212 } else if( !strcmp(el, "updated") ) {
213 ctx->curitem->date_modified = procheader_date_parse(NULL, text, 0);
214 }
215
216 break;
217
218 /* We're in feed/author or about to leave feed/entry/author */
219 case FEED_LOC_ATOM10_AUTHOR:
220 if( !strcmp(el, "author" ) ) {
221 /* We just finished parsing <author> */
222 ctx->curitem->author = g_strdup_printf("%s%s%s%s%s",
223 ctx->name ? ctx->name : "",
224 ctx->name && ctx->mail ? " <" : ctx->mail ? "<" : "",
225 ctx->mail ? ctx->mail : "",
226 ctx->mail ? ">" : "",
227 !ctx->name && !ctx->mail ? "N/A" : "");
228 ctx->location = FEED_LOC_ATOM10_ENTRY;
229 } else if( !strcmp(el, "name") ) {
230 FILL(feed->author)
231 }
232
233 break;
234
235 case FEED_LOC_ATOM10_CONTENT:
236 if( !strcmp(el, "content") ) {
237 if (ctx->curitem->xhtml_content) {
238 /* Just in case the <content> tag itself also has some
239 * content of its own, not just the <div> it should,
240 * let's append it to the end. */
241 g_string_append(ctx->xhtml_str, text);
242 ctx->curitem->text = g_string_free(ctx->xhtml_str, FALSE);
243 ctx->xhtml_str = NULL;
244 } else {
245 FILL(ctx->curitem->text)
246 }
247 ctx->location = FEED_LOC_ATOM10_ENTRY;
248 }
249
250 break;
251 }
252 break;
253
254 case 4:
255
256 if( ctx->curitem == NULL )
257 break;
258
259 switch(ctx->location) {
260
261 /* We're in feed/entry/author */
262 case FEED_LOC_ATOM10_AUTHOR:
263 if( !strcmp(el, "name") ) {
264 FILL(ctx->name)
265 } else if( !strcmp(el, "email") ) {
266 FILL(ctx->mail)
267 }
268
269 break;
270
271 /* We're in feed/entry/source */
272 case FEED_LOC_ATOM10_SOURCE:
273 if( !strcmp(el, "title" ) ) {
274 FILL(ctx->curitem->sourcetitle)
275 } else if( !strcmp(el, "id" ) ) {
276 FILL(ctx->curitem->sourceid)
277 } else if( !strcmp(el, "updated" ) ) {
278 ctx->curitem->sourcedate = procheader_date_parse(NULL, text, 0);
279 }
280
281 break;
282
283 case FEED_LOC_ATOM10_CONTENT:
284 if (ctx->curitem->xhtml_content) {
285 g_string_append(ctx->xhtml_str, text);
286 g_string_append_printf(ctx->xhtml_str, "</%s>", el);
287 }
288 break;
289
290 }
291
292
293 break;
294
295 default:
296 if (ctx->location == FEED_LOC_ATOM10_CONTENT
297 && ctx->curitem->xhtml_content) {
298 g_string_append(ctx->xhtml_str, text);
299 g_string_append_printf(ctx->xhtml_str, "</%s>", el);
300 }
301 break;
302 }
303
304 if( ctx->str != NULL ) {
305 g_free(text);
306 g_string_free(ctx->str, TRUE);
307 ctx->str = NULL;
308 }
309 ctx->str = NULL;
310
311 ctx->depth--;
312 }
313