1 /* rss2ical.c: Turn RSS into ical
2 *
3 * Copyright (C) 2008, David Beckett http://www.dajobe.org/
4 *
5 * This package is Free Software and part of Redland http://librdf.org/
6 *
7 * It is licensed under the following three licenses as alternatives:
8 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
9 * 2. GNU General Public License (GPL) V2 or any newer version
10 * 3. Apache License, V2.0 or any newer version
11 *
12 * You may not use this file except in compliance with at least one of
13 * the above three licenses.
14 *
15 * See LICENSE.html or LICENSE.txt at the top of this package for the
16 * complete terms and further detail along with the license texts for
17 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
18 *
19 *
20 * USAGE: rss2ical [URI of RSS/Atom Feed] [Calendar Title] > result.ics
21 *
22 */
23
24 #include <stdio.h>
25 #include <string.h>
26 #include <stdlib.h>
27
28 #include <redland.h>
29
30
31 #undef RSS2ICAL_DEBUG
32
33 #define DO_ESCAPE_NL 0
34
35 static const unsigned char* get_items_query=(const unsigned char*)
36 "PREFIX rss: <http://purl.org/rss/1.0/>\n\
37 PREFIX dc: <http://purl.org/dc/elements/1.1/>\n\
38 PREFIX content: <http://web.resource.org/rss/1.0/modules/content/>\n\
39 SELECT ?item ?date ?title ?description ?creator\n\
40 WHERE {\n\
41 ?item a rss:item;\n\
42 dc:date ?date;\n\
43 rss:title ?title;\n\
44 rss:description ?description .\n\
45 OPTIONAL { ?item dc:creator ?creator . }\n\
46 }";
47
48 /*
49 Removed for now as OPTIONAL in Rasqal is fragile/broken:
50 OPTIONAL { ?item dc:source ?source } \n\
51 OPTIONAL { ?item content:encoded ?htmldesc } \n \
52 */
53
54 /* %s-prod id %s-cal name id %s-rel cal id %s-tzone %s-tzone */
55 static const char *ical_header_format="\
56 BEGIN:VCALENDAR\r\n\
57 VERSION:2.0\r\n\
58 PRODID:%s\r\n\
59 X-WR-CALNAME:%s\r\n\
60 X-WR-RELCALID:%s\r\n\
61 X-WR-TIMEZONE:%s\r\n\
62 CALSCALE:GREGORIAN\r\n\
63 METHOD:PUBLISH\r\n\
64 BEGIN:VTIMEZONE\r\n\
65 TZID:%s\r\n\
66 BEGIN:DAYLIGHT\r\n\
67 DTSTART:20060326T020000\r\n\
68 TZOFFSETTO:+0100\r\n\
69 TZOFFSETFROM:+0000\r\n\
70 TZNAME:BST\r\n\
71 END:DAYLIGHT\r\n\
72 BEGIN:STANDARD\r\n\
73 DTSTART:20061029T020000\r\n\
74 TZOFFSETTO:+0000\r\n\
75 TZOFFSETFROM:+0100\r\n\
76 TZNAME:GMT\r\n\
77 END:STANDARD\r\n\
78 END:VTIMEZONE\r\n\
79 ";
80
81 static const char *ical_footer_format="\
82 END:VCALENDAR\r\n\
83 ";
84
85 static const char *tzone="Europe/London";
86
87 static char *program=NULL;
88
89
90 static void
ical_print(FILE * fh,const char * line)91 ical_print(FILE *fh, const char *line)
92 {
93 fputs(line, fh);
94 fwrite("\r\n", 1, 2, fh);
95 }
96
97
98 static void
ical_format(FILE * fh,const char * key,const char * attr,const char * escapes,const unsigned char * value)99 ical_format(FILE *fh, const char *key, const char *attr,
100 const char *escapes, const unsigned char *value)
101 {
102 int col=0;
103 int i=0;
104 size_t len;
105 int c;
106 int lineno=0;
107
108 len=strlen(key);
109 fwrite(key, 1, len, fh);
110 col += len;
111
112 if(attr) {
113 fputc(';', fh);
114 col++;
115 len=strlen(attr);
116 fwrite(attr, 1, len, fh);
117 col += len;
118 }
119
120 fputc(':', fh);
121 col++;
122
123 for(i=0; (c=value[i]); i++) {
124 if(col == 75) {
125 fwrite("\r\n ", 1, 3, fh);
126 col=0;
127 lineno++;
128 }
129 if(c == '\\' ||
130 (escapes && (strchr(escapes, c) != NULL))) {
131 fputc('\\', fh);
132 col++;
133 }
134 if(c == '\n') {
135 #ifdef DO_ESCAPE_NL
136 fputc('\\', fh);
137 col++;
138 c='n';
139 #else
140 c=' ';
141 #endif
142 }
143 fputc(c, fh);
144 col++;
145 }
146 fwrite("\r\n", 1, 2, fh);
147 }
148
149
150 static unsigned char*
iso2vcaldate(const unsigned char * iso_date)151 iso2vcaldate(const unsigned char* iso_date)
152 {
153 unsigned char* vcaldate;
154 unsigned char c;
155 int i, j;
156
157 /* YYYY-MM-DDTHH:MM:SSZ }
158 * YYYY-MM-DDTHH:MMZ } to YYYYMMDDTHHMMSSZ
159 * ... }
160 */
161 vcaldate=(unsigned char*)malloc(17);
162 strncpy((char*)vcaldate, "00000000T000000Z", 17);
163 for(i=0, j=0; (c=iso_date[i]); i++) {
164 if(c == 'Z')
165 break;
166 if(c != ':' && c != '-')
167 vcaldate[j++]=iso_date[i];
168 }
169
170 return vcaldate;
171 }
172
173
174
175 static unsigned char*
remove_html_entities(unsigned char * html_desc,size_t len)176 remove_html_entities(unsigned char* html_desc, size_t len)
177 {
178 int i, j;
179 unsigned char* description;
180 unsigned char c;
181
182 description=malloc(len+1);
183
184 /* Trim leading white space */
185 for(i=0, j=0; (c=html_desc[i]) && (c == ' ' || c == '\n'); i++)
186 ;
187
188 for(; (c=html_desc[i]); i++) {
189 if(c == '\n')
190 c=' ';
191 else if(c == '&') {
192 c=html_desc[++i];
193
194 /* Expand { to UTF-8 for codepoint decimal 123 */
195 if(c == '#') {
196 unsigned char *orig_p=&html_desc[i];
197 unsigned long d=0;
198 int ulen;
199
200 i++;
201 while(c) {
202 c=html_desc[i++];
203 if(c<'0' || c>'9')
204 break;
205 d=d*10;
206 d+= (c - '0');
207 }
208 if(c != ';') {
209 fprintf(stderr, "%s: Expected ';' after &#NNN in '%s'\n",
210 program, orig_p);
211 abort();
212 }
213
214 #ifdef RSS2ICAL_DEBUG
215 fprintf(stderr, "%s: Encoding char %d\n", program, d);
216 #endif
217 ulen=raptor_unicode_char_to_utf8(d, &description[j]);
218 #ifdef RSS2ICAL_DEBUG
219 fprintf(stderr, "%s: UTF-8 len was %d\n", program, ulen);
220 #endif
221 j+= ulen;
222
223 } else {
224 const char* here=(const char*)&html_desc[i];
225
226 if(!strncmp(here, "amp;", 4)) {
227 i+= 4;
228 c='&';
229 } else if(!strncmp(here, "lt;", 3)) {
230 i+= 3;
231 c='<';
232 } else if(!strncmp(here, "gt;", 3)) {
233 i+= 3;
234 c='>';
235 }
236
237 description[j++]=c;
238 }
239 continue;
240 }
241
242 description[j++]=c;
243 }
244 description[j]='\0';
245 return description;
246 }
247
248
249 static char*
uri_to_calid(librdf_uri * uri)250 uri_to_calid(librdf_uri* uri)
251 {
252 size_t len;
253 unsigned char* uri_string;
254 char* calid;
255 unsigned char c;
256 int i, j;
257
258 uri_string=librdf_uri_as_counted_string(uri, &len);
259
260 calid=(char*)malloc(len+1);
261 for(i=0, j=0; (c=uri_string[i]); i++) {
262 if(c <= 0x20 || c >= 0x7f)
263 continue;
264
265 if(c == '\\' || c == ';' || c == ':' || c == '\"' || c == ',' ||
266 c == '/')
267 calid[j++]='-';
268 else
269 calid[j++]=c;
270 }
271
272 return calid;
273 }
274
275
276 int
main(int argc,char * argv[])277 main(int argc, char *argv[])
278 {
279 librdf_world* world;
280 librdf_storage* storage;
281 librdf_model* model;
282 librdf_parser* parser;
283 librdf_query* query;
284 librdf_query_results* results;
285 librdf_uri *uri;
286 char *p;
287 char* calendar_name;
288 char* calendar_id;
289
290 program=argv[0];
291 if((p=strrchr(program, '/')))
292 program=p+1;
293 else if((p=strrchr(program, '\\')))
294 program=p+1;
295 argv[0]=program;
296
297 if(argc != 3) {
298 fprintf(stderr, "USAGE: %s RSS-URI CALENDAR-NAME\n", program);
299 return 1;
300 }
301
302 world=librdf_new_world();
303 librdf_world_open(world);
304
305 storage=librdf_new_storage(world, "memory", NULL, NULL);
306 model=librdf_new_model(world, storage, NULL);
307
308 if(!model || !storage) {
309 fprintf(stderr, "%s: Failed to make model or storage\n", program);
310 return 1;
311 }
312
313 uri=librdf_new_uri(world, (unsigned char*)argv[1]);
314
315 calendar_name=argv[2];
316
317 fprintf(stderr, "%s: Reading RSS from %s\n", program,
318 librdf_uri_as_string(uri));
319
320 parser=librdf_new_parser(world, "rss-tag-soup", NULL, NULL);
321 librdf_parser_parse_into_model(parser, uri, NULL, model);
322 librdf_free_parser(parser);
323
324 fprintf(stderr, "%s: Querying model for RSS items\n", program);
325
326 query=librdf_new_query(world, "sparql", NULL, get_items_query, uri);
327
328 results=librdf_model_query_execute(model, query);
329 if(!results) {
330 fprintf(stderr, "%s: Query of model with SPARQL query '%s' failed\n",
331 program, get_items_query);
332 return 1;
333 }
334
335 fprintf(stderr, "%s: Processing results\n", program);
336
337 calendar_id=uri_to_calid(uri);
338
339 fprintf(stdout, ical_header_format,
340 "-//librdf/rss2ical Version 1.0//EN",
341 calendar_name,
342 calendar_id,
343 tzone,
344 tzone);
345
346 free(calendar_id);
347
348 while(!librdf_query_results_finished(results)) {
349 unsigned char *uid=NULL;
350 unsigned char *summary=NULL;
351 unsigned char *dtstart=NULL;
352 unsigned char *location=NULL;
353 unsigned char *html_desc=NULL;
354 size_t html_desc_len;
355 unsigned char *description=NULL;
356 unsigned char *url=NULL;
357 librdf_node* node;
358 char *creator=NULL;
359
360 node=librdf_query_results_get_binding_value_by_name(results, "item");
361 if(!librdf_node_is_resource(node))
362 goto nextresult;
363
364 url=librdf_uri_as_string(librdf_node_get_uri(node));
365
366 /* uid is a new string */
367 uid=(unsigned char*)uri_to_calid(librdf_node_get_uri(node));
368
369 node=librdf_query_results_get_binding_value_by_name(results, "date");
370 if(!librdf_node_is_literal(node)) {
371 fprintf(stderr, "%s: Date in item %s is not a literal\n", program, url);
372 goto nextresult;
373 }
374 dtstart=librdf_node_get_literal_value(node);
375 dtstart=iso2vcaldate(dtstart);
376
377 node=librdf_query_results_get_binding_value_by_name(results, "title");
378 if(!librdf_node_is_literal(node))
379 summary=(unsigned char*)"(No Title)";
380 else
381 summary=librdf_node_get_literal_value(node);
382
383 node=librdf_query_results_get_binding_value_by_name(results, "htmldesc");
384 if(node && librdf_node_is_literal(node))
385 html_desc=librdf_node_get_literal_value_as_counted_string(node,
386 &html_desc_len);
387
388 if(!description) {
389 node=librdf_query_results_get_binding_value_by_name(results, "description");
390 if(node && librdf_node_is_literal(node))
391 html_desc=librdf_node_get_literal_value_as_counted_string(node,
392 &html_desc_len);
393 }
394 if(html_desc) {
395 description=remove_html_entities(html_desc, html_desc_len);
396 }
397
398 node=librdf_query_results_get_binding_value_by_name(results, "source");
399 if(node && librdf_node_is_literal(node))
400 location=librdf_node_get_literal_value(node);
401
402 node=librdf_query_results_get_binding_value_by_name(results, "creator");
403 if(node && librdf_node_is_literal(node)) {
404 unsigned char *value=librdf_node_get_literal_value(node);
405 creator=malloc(strlen((const char*)value)+6);
406 sprintf(creator, "CN=\"%s\"", value);
407 }
408
409
410 ical_print(stdout, "BEGIN:VEVENT");
411 ical_format(stdout, "UID", NULL, NULL, uid);
412 ical_format(stdout, "SUMMARY", NULL, NULL, summary);
413 if(location)
414 ical_format(stdout, "LOCATION", NULL, NULL, location);
415 if(creator) {
416 ical_format(stdout, "ATTENDEE", creator, NULL,
417 (const unsigned char*)"invalid:nomail");
418 free(creator);
419 }
420 ical_format(stdout, "DTSTART", NULL, NULL, dtstart);
421 ical_format(stdout, "DTSTAMP", NULL, NULL, dtstart);
422 ical_format(stdout, "LAST-MODIFIED", NULL, NULL, dtstart);
423 ical_format(stdout, "DESCRIPTION", NULL, ";,\"", description);
424 if(url)
425 ical_format(stdout, "URL", "VALUE=URI", ";", url);
426 ical_format(stdout, "CLASS", NULL, NULL, (const unsigned char*)"PUBLIC");
427
428 ical_print(stdout, "END:VEVENT");
429
430 free(description);
431 free(uid);
432
433 nextresult:
434 librdf_query_results_next(results);
435 }
436
437 fputs(ical_footer_format, stdout);
438
439 librdf_free_query_results(results);
440 librdf_free_query(query);
441
442 librdf_free_uri(uri);
443
444 librdf_free_model(model);
445 librdf_free_storage(storage);
446
447 librdf_free_world(world);
448
449 /* keep gcc -Wall happy */
450 return(0);
451 }
452