1 /* rss2ical.c: Turn RSS into ical
2  *
3  * Copyright (C) 2008, David Beckett http://www.dajobe.org/
4  *
5  * This package is Free Software and part of Redland http://librdf.org/
6  *
7  * It is licensed under the following three licenses as alternatives:
8  *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
9  *   2. GNU General Public License (GPL) V2 or any newer version
10  *   3. Apache License, V2.0 or any newer version
11  *
12  * You may not use this file except in compliance with at least one of
13  * the above three licenses.
14  *
15  * See LICENSE.html or LICENSE.txt at the top of this package for the
16  * complete terms and further detail along with the license texts for
17  * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
18  *
19  *
20  * USAGE: rss2ical [URI of RSS/Atom Feed] [Calendar Title] > result.ics
21  *
22  */
23 
24 #include <stdio.h>
25 #include <string.h>
26 #include <stdlib.h>
27 
28 #include <redland.h>
29 
30 
31 #undef RSS2ICAL_DEBUG
32 
33 #define DO_ESCAPE_NL 0
34 
35 static const unsigned char* get_items_query=(const unsigned char*)
36 "PREFIX rss: <http://purl.org/rss/1.0/>\n\
37 PREFIX dc: <http://purl.org/dc/elements/1.1/>\n\
38 PREFIX content: <http://web.resource.org/rss/1.0/modules/content/>\n\
39 SELECT ?item ?date ?title ?description ?creator\n\
40 WHERE {\n\
41  ?item a rss:item;\n\
42        dc:date ?date;\n\
43        rss:title ?title;\n\
44        rss:description ?description .\n\
45  OPTIONAL { ?item dc:creator ?creator . }\n\
46 }";
47 
48 /*
49   Removed for now as OPTIONAL in Rasqal is fragile/broken:
50   OPTIONAL { ?item  dc:source ?source } \n\
51   OPTIONAL { ?item  content:encoded ?htmldesc } \n      \
52 */
53 
54 /* %s-prod id %s-cal name id %s-rel cal id %s-tzone %s-tzone */
55 static const char *ical_header_format="\
56 BEGIN:VCALENDAR\r\n\
57 VERSION:2.0\r\n\
58 PRODID:%s\r\n\
59 X-WR-CALNAME:%s\r\n\
60 X-WR-RELCALID:%s\r\n\
61 X-WR-TIMEZONE:%s\r\n\
62 CALSCALE:GREGORIAN\r\n\
63 METHOD:PUBLISH\r\n\
64 BEGIN:VTIMEZONE\r\n\
65 TZID:%s\r\n\
66 BEGIN:DAYLIGHT\r\n\
67 DTSTART:20060326T020000\r\n\
68 TZOFFSETTO:+0100\r\n\
69 TZOFFSETFROM:+0000\r\n\
70 TZNAME:BST\r\n\
71 END:DAYLIGHT\r\n\
72 BEGIN:STANDARD\r\n\
73 DTSTART:20061029T020000\r\n\
74 TZOFFSETTO:+0000\r\n\
75 TZOFFSETFROM:+0100\r\n\
76 TZNAME:GMT\r\n\
77 END:STANDARD\r\n\
78 END:VTIMEZONE\r\n\
79 ";
80 
81 static const char *ical_footer_format="\
82 END:VCALENDAR\r\n\
83 ";
84 
85 static const char *tzone="Europe/London";
86 
87 static  char *program=NULL;
88 
89 
90 static void
ical_print(FILE * fh,const char * line)91 ical_print(FILE *fh, const char *line)
92 {
93   fputs(line, fh);
94   fwrite("\r\n", 1, 2, fh);
95 }
96 
97 
98 static void
ical_format(FILE * fh,const char * key,const char * attr,const char * escapes,const unsigned char * value)99 ical_format(FILE *fh, const char *key, const char *attr,
100             const char *escapes, const unsigned char *value)
101 {
102   int col=0;
103   int i=0;
104   size_t len;
105   int c;
106   int lineno=0;
107 
108   len=strlen(key);
109   fwrite(key, 1, len, fh);
110   col += len;
111 
112   if(attr) {
113     fputc(';', fh);
114     col++;
115     len=strlen(attr);
116     fwrite(attr, 1, len, fh);
117     col += len;
118   }
119 
120   fputc(':', fh);
121   col++;
122 
123   for(i=0; (c=value[i]); i++)  {
124     if(col == 75) {
125       fwrite("\r\n ", 1, 3, fh);
126       col=0;
127       lineno++;
128     }
129     if(c == '\\' ||
130        (escapes && (strchr(escapes, c) != NULL))) {
131       fputc('\\', fh);
132       col++;
133     }
134     if(c == '\n') {
135 #ifdef DO_ESCAPE_NL
136       fputc('\\', fh);
137       col++;
138       c='n';
139 #else
140       c=' ';
141 #endif
142     }
143     fputc(c, fh);
144     col++;
145   }
146   fwrite("\r\n", 1, 2, fh);
147 }
148 
149 
150 static unsigned char*
iso2vcaldate(const unsigned char * iso_date)151 iso2vcaldate(const unsigned char* iso_date)
152 {
153   unsigned char* vcaldate;
154   unsigned char c;
155   int i, j;
156 
157   /* YYYY-MM-DDTHH:MM:SSZ }
158    * YYYY-MM-DDTHH:MMZ    } to YYYYMMDDTHHMMSSZ
159    * ...                  }
160    */
161   vcaldate=(unsigned char*)malloc(17);
162   strncpy((char*)vcaldate, "00000000T000000Z", 17);
163   for(i=0, j=0; (c=iso_date[i]); i++) {
164     if(c == 'Z')
165       break;
166     if(c != ':' && c != '-')
167       vcaldate[j++]=iso_date[i];
168   }
169 
170   return vcaldate;
171 }
172 
173 
174 
175 static unsigned char*
remove_html_entities(unsigned char * html_desc,size_t len)176 remove_html_entities(unsigned char* html_desc, size_t len)
177 {
178   int i, j;
179   unsigned char* description;
180   unsigned char c;
181 
182   description=malloc(len+1);
183 
184   /* Trim leading white space */
185   for(i=0, j=0; (c=html_desc[i]) && (c == ' ' || c == '\n'); i++)
186     ;
187 
188   for(; (c=html_desc[i]); i++) {
189     if(c == '\n')
190       c=' ';
191     else if(c == '&') {
192       c=html_desc[++i];
193 
194       /* Expand &#123; to UTF-8 for codepoint decimal 123 */
195       if(c == '#') {
196         unsigned char *orig_p=&html_desc[i];
197         unsigned long d=0;
198         int ulen;
199 
200         i++;
201         while(c) {
202           c=html_desc[i++];
203           if(c<'0' || c>'9')
204             break;
205           d=d*10;
206           d+= (c - '0');
207         }
208         if(c != ';') {
209           fprintf(stderr, "%s: Expected ';' after &#NNN in '%s'\n",
210                   program, orig_p);
211           abort();
212         }
213 
214 #ifdef RSS2ICAL_DEBUG
215         fprintf(stderr, "%s: Encoding char %d\n", program, d);
216 #endif
217         ulen=raptor_unicode_char_to_utf8(d, &description[j]);
218 #ifdef RSS2ICAL_DEBUG
219         fprintf(stderr, "%s: UTF-8 len was %d\n", program, ulen);
220 #endif
221         j+= ulen;
222 
223       } else {
224         const char* here=(const char*)&html_desc[i];
225 
226         if(!strncmp(here, "amp;", 4)) {
227           i+= 4;
228           c='&';
229         } else if(!strncmp(here, "lt;", 3)) {
230           i+= 3;
231           c='<';
232         } else if(!strncmp(here, "gt;", 3)) {
233           i+= 3;
234           c='>';
235         }
236 
237         description[j++]=c;
238       }
239       continue;
240     }
241 
242     description[j++]=c;
243   }
244   description[j]='\0';
245   return description;
246 }
247 
248 
249 static char*
uri_to_calid(librdf_uri * uri)250 uri_to_calid(librdf_uri* uri)
251 {
252   size_t len;
253   unsigned char* uri_string;
254   char* calid;
255   unsigned char c;
256   int i, j;
257 
258   uri_string=librdf_uri_as_counted_string(uri, &len);
259 
260   calid=(char*)malloc(len+1);
261   for(i=0, j=0; (c=uri_string[i]); i++) {
262     if(c <= 0x20 || c >= 0x7f)
263       continue;
264 
265     if(c == '\\' || c == ';' || c == ':' || c == '\"' || c == ',' ||
266        c == '/')
267       calid[j++]='-';
268     else
269       calid[j++]=c;
270   }
271 
272   return calid;
273 }
274 
275 
276 int
main(int argc,char * argv[])277 main(int argc, char *argv[])
278 {
279   librdf_world* world;
280   librdf_storage* storage;
281   librdf_model* model;
282   librdf_parser* parser;
283   librdf_query* query;
284   librdf_query_results* results;
285   librdf_uri *uri;
286   char *p;
287   char* calendar_name;
288   char* calendar_id;
289 
290   program=argv[0];
291   if((p=strrchr(program, '/')))
292     program=p+1;
293   else if((p=strrchr(program, '\\')))
294     program=p+1;
295   argv[0]=program;
296 
297   if(argc != 3) {
298     fprintf(stderr, "USAGE: %s RSS-URI CALENDAR-NAME\n", program);
299     return 1;
300   }
301 
302   world=librdf_new_world();
303   librdf_world_open(world);
304 
305   storage=librdf_new_storage(world, "memory", NULL, NULL);
306   model=librdf_new_model(world, storage, NULL);
307 
308   if(!model || !storage) {
309     fprintf(stderr, "%s: Failed to make model or storage\n", program);
310     return 1;
311   }
312 
313   uri=librdf_new_uri(world, (unsigned char*)argv[1]);
314 
315   calendar_name=argv[2];
316 
317   fprintf(stderr, "%s: Reading RSS from %s\n", program,
318           librdf_uri_as_string(uri));
319 
320   parser=librdf_new_parser(world, "rss-tag-soup", NULL, NULL);
321   librdf_parser_parse_into_model(parser, uri, NULL, model);
322   librdf_free_parser(parser);
323 
324   fprintf(stderr, "%s: Querying model for RSS items\n", program);
325 
326   query=librdf_new_query(world, "sparql", NULL, get_items_query, uri);
327 
328   results=librdf_model_query_execute(model, query);
329   if(!results) {
330     fprintf(stderr, "%s: Query of model with SPARQL query '%s' failed\n",
331             program, get_items_query);
332     return 1;
333   }
334 
335   fprintf(stderr, "%s: Processing results\n", program);
336 
337   calendar_id=uri_to_calid(uri);
338 
339   fprintf(stdout, ical_header_format,
340           "-//librdf/rss2ical Version 1.0//EN",
341           calendar_name,
342           calendar_id,
343           tzone,
344           tzone);
345 
346   free(calendar_id);
347 
348   while(!librdf_query_results_finished(results)) {
349     unsigned char *uid=NULL;
350     unsigned char *summary=NULL;
351     unsigned char *dtstart=NULL;
352     unsigned char *location=NULL;
353     unsigned char *html_desc=NULL;
354     size_t html_desc_len;
355     unsigned char *description=NULL;
356     unsigned char *url=NULL;
357     librdf_node* node;
358     char *creator=NULL;
359 
360     node=librdf_query_results_get_binding_value_by_name(results, "item");
361     if(!librdf_node_is_resource(node))
362       goto nextresult;
363 
364     url=librdf_uri_as_string(librdf_node_get_uri(node));
365 
366     /* uid is a new string */
367     uid=(unsigned char*)uri_to_calid(librdf_node_get_uri(node));
368 
369     node=librdf_query_results_get_binding_value_by_name(results, "date");
370     if(!librdf_node_is_literal(node)) {
371       fprintf(stderr, "%s: Date in item %s is not a literal\n", program, url);
372       goto nextresult;
373     }
374     dtstart=librdf_node_get_literal_value(node);
375     dtstart=iso2vcaldate(dtstart);
376 
377     node=librdf_query_results_get_binding_value_by_name(results, "title");
378     if(!librdf_node_is_literal(node))
379       summary=(unsigned char*)"(No Title)";
380     else
381       summary=librdf_node_get_literal_value(node);
382 
383     node=librdf_query_results_get_binding_value_by_name(results, "htmldesc");
384     if(node && librdf_node_is_literal(node))
385         html_desc=librdf_node_get_literal_value_as_counted_string(node,
386                                                                   &html_desc_len);
387 
388     if(!description) {
389       node=librdf_query_results_get_binding_value_by_name(results, "description");
390       if(node && librdf_node_is_literal(node))
391         html_desc=librdf_node_get_literal_value_as_counted_string(node,
392                                                                   &html_desc_len);
393     }
394     if(html_desc) {
395       description=remove_html_entities(html_desc, html_desc_len);
396     }
397 
398     node=librdf_query_results_get_binding_value_by_name(results, "source");
399     if(node && librdf_node_is_literal(node))
400       location=librdf_node_get_literal_value(node);
401 
402     node=librdf_query_results_get_binding_value_by_name(results, "creator");
403     if(node && librdf_node_is_literal(node)) {
404       unsigned char *value=librdf_node_get_literal_value(node);
405       creator=malloc(strlen((const char*)value)+6);
406       sprintf(creator, "CN=\"%s\"", value);
407     }
408 
409 
410     ical_print(stdout, "BEGIN:VEVENT");
411     ical_format(stdout, "UID", NULL, NULL, uid);
412     ical_format(stdout, "SUMMARY", NULL, NULL, summary);
413     if(location)
414       ical_format(stdout, "LOCATION", NULL, NULL, location);
415     if(creator) {
416       ical_format(stdout, "ATTENDEE", creator, NULL,
417                   (const unsigned char*)"invalid:nomail");
418       free(creator);
419     }
420     ical_format(stdout, "DTSTART", NULL, NULL, dtstart);
421     ical_format(stdout, "DTSTAMP", NULL, NULL, dtstart);
422     ical_format(stdout, "LAST-MODIFIED", NULL, NULL, dtstart);
423     ical_format(stdout, "DESCRIPTION", NULL, ";,\"", description);
424     if(url)
425       ical_format(stdout, "URL", "VALUE=URI", ";", url);
426     ical_format(stdout, "CLASS", NULL, NULL, (const unsigned char*)"PUBLIC");
427 
428     ical_print(stdout, "END:VEVENT");
429 
430     free(description);
431     free(uid);
432 
433 nextresult:
434     librdf_query_results_next(results);
435   }
436 
437   fputs(ical_footer_format, stdout);
438 
439   librdf_free_query_results(results);
440   librdf_free_query(query);
441 
442   librdf_free_uri(uri);
443 
444   librdf_free_model(model);
445   librdf_free_storage(storage);
446 
447   librdf_free_world(world);
448 
449   /* keep gcc -Wall happy */
450   return(0);
451 }
452