1 /* -*- Mode: c; c-basic-offset: 2 -*-
2 *
3 * raptor_serialize_rss.c - Raptor RSS 1.0 and Atom 1.0 serializers
4 *
5 * Copyright (C) 2003-2009, David Beckett http://www.dajobe.org/
6 * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/
7 *
8 * This package is Free Software and part of Redland http://librdf.org/
9 *
10 * It is licensed under the following three licenses as alternatives:
11 * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
12 * 2. GNU General Public License (GPL) V2 or any newer version
13 * 3. Apache License, V2.0 or any newer version
14 *
15 * You may not use this file except in compliance with at least one of
16 * the above three licenses.
17 *
18 * See LICENSE.html or LICENSE.txt at the top of this package for the
19 * complete terms and further detail along with the license texts for
20 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
21 *
22 *
23 */
24
25 #ifdef HAVE_CONFIG_H
26 #include <raptor_config.h>
27 #endif
28
29 #ifdef WIN32
30 #include <win32_raptor_config.h>
31 #endif
32
33 #include <stdio.h>
34 #include <string.h>
35 #include <ctype.h>
36 #include <stdarg.h>
37 #ifdef HAVE_ERRNO_H
38 #include <errno.h>
39 #endif
40 #ifdef HAVE_STDLIB_H
41 #include <stdlib.h>
42 #endif
43
44 /* Raptor includes */
45 #include "raptor.h"
46 #include "raptor_internal.h"
47 #include "raptor_rss.h"
48
49
50 typedef struct {
51 raptor_world* world;
52 /* owned by this: URI OR bnode if starts with _: */
53 raptor_uri* uri;
54 /* shared pointer */
55 raptor_rss_item* item;
56 } raptor_rss_group_map;
57
58
59
60 /*
61 * Raptor 'RSS 1.0' serializer object
62 */
63 typedef struct {
64 raptor_world* world;
65
66 /* static rss model */
67 raptor_rss_model model;
68
69 /* Triples with no assigned type node */
70 raptor_sequence *triples;
71
72 /* Sequence of raptor_rss_item* : rdf:Seq items rdf:_<n> at offset n */
73 raptor_sequence *items;
74
75 /* Sequence of raptor_rss_item* (?x rdf:type rss:Enclosure) */
76 raptor_sequence *enclosures;
77
78 /* URI of rdf:Seq node */
79 raptor_uri *seq_uri;
80
81 /* Namespace stack for serializing */
82 raptor_namespace_stack *nstack;
83
84 /* the default namespace (rdf: or atom:) -
85 * this is destroyed when nstack above is deleted
86 */
87 raptor_namespace* default_nspace;
88
89 /* the xml: namespace */
90 raptor_namespace *xml_nspace;
91
92 /* the root element (rdf:RDF or atom:feed) */
93 raptor_xml_element* root_element;
94
95 /* where the xml is being written */
96 raptor_xml_writer *xml_writer;
97
98 /* non-0 if this is an atom 1.0 serializer */
99 int is_atom;
100
101 /* 0 = none
102 * 1 = existing rss:item item containg rdf/xml encoding of any extra
103 * triples about URI (rss-1.0 serializer only)
104 * 2 = at:md element containing rdf/xml property elements encoding
105 * of any extra triples about URI (atom serializer only)
106 */
107 int rss_triples_mode;
108
109 /* namespaces declared here */
110 raptor_namespace* nspaces[RAPTOR_RSS_NAMESPACES_SIZE];
111
112 /* Map of group URI (key, owned) : rss item object (value, shared) */
113 raptor_avltree *group_map;
114
115 /* User declared namespaces */
116 raptor_sequence *user_namespaces;
117
118 /* URI of XML Literal datatype */
119 raptor_uri* xml_literal_dt;
120
121 int free_default_nspace;
122 } raptor_rss10_serializer_context;
123
124
125 static void
raptor_free_group_map(raptor_rss_group_map * gm)126 raptor_free_group_map(raptor_rss_group_map* gm)
127 {
128 if(gm->uri)
129 raptor_free_uri_v2(gm->world, gm->uri);
130 RAPTOR_FREE(raptor_rss_group_map, gm);
131 }
132
133
134 static int
raptor_rss_group_map_compare(raptor_rss_group_map * gm1,raptor_rss_group_map * gm2)135 raptor_rss_group_map_compare(raptor_rss_group_map* gm1,
136 raptor_rss_group_map* gm2)
137 {
138 return raptor_uri_compare_v2(gm1->world, gm1->uri, gm2->uri);
139 }
140
141
142 static raptor_rss_item*
raptor_rss10_get_group_item(raptor_rss10_serializer_context * rss_serializer,raptor_uri * uri)143 raptor_rss10_get_group_item(raptor_rss10_serializer_context *rss_serializer,
144 raptor_uri* uri)
145 {
146 raptor_rss_group_map search_gm;
147 raptor_rss_group_map* gm;
148
149 search_gm.world=rss_serializer->world;
150 search_gm.uri=uri;
151 gm=(raptor_rss_group_map*)raptor_avltree_search(rss_serializer->group_map,
152 (void*)&search_gm);
153
154 return gm ? gm->item : NULL;
155 }
156
157
158 static int
raptor_rss10_set_item_group(raptor_rss10_serializer_context * rss_serializer,raptor_uri * uri,raptor_rss_item * item)159 raptor_rss10_set_item_group(raptor_rss10_serializer_context *rss_serializer,
160 raptor_uri* uri, raptor_rss_item *item)
161 {
162 raptor_rss_group_map* gm;
163
164 if(raptor_rss10_get_group_item(rss_serializer, uri))
165 return 0;
166
167 gm=(raptor_rss_group_map*)RAPTOR_CALLOC(raptor_rss_group_map, 1,
168 sizeof(raptor_rss_group_map));
169 gm->world=rss_serializer->world;
170 gm->uri=raptor_uri_copy_v2(rss_serializer->world, uri);
171 gm->item=item;
172
173 raptor_avltree_add(rss_serializer->group_map, gm);
174 return 0;
175 }
176
177
178 /**
179 * raptor_rss10_serialize_init:
180 * @serializer: serializer object
181 * @name: serializer name
182 *
183 * INTERNAL (raptor_serializer_factory API) - create a new serializer
184 *
185 * Return value: non-0 on failure
186 */
187 static int
raptor_rss10_serialize_init(raptor_serializer * serializer,const char * name)188 raptor_rss10_serialize_init(raptor_serializer* serializer, const char *name)
189 {
190 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
191
192 rss_serializer->world=serializer->world;
193
194 raptor_rss_common_init(serializer->world);
195 raptor_rss_model_init(serializer->world, &rss_serializer->model);
196
197 rss_serializer->triples=raptor_new_sequence((raptor_sequence_free_handler*)raptor_free_statement_v2, (raptor_sequence_print_handler*)raptor_print_statement_v2);
198
199 rss_serializer->items=raptor_new_sequence((raptor_sequence_free_handler*)raptor_free_rss_item, (raptor_sequence_print_handler*)NULL);
200
201 rss_serializer->enclosures=raptor_new_sequence((raptor_sequence_free_handler*)raptor_free_rss_item, (raptor_sequence_print_handler*)NULL);
202
203 rss_serializer->group_map=raptor_new_avltree(serializer->world,
204 (raptor_data_compare_function)raptor_rss_group_map_compare,
205 (raptor_data_free_function)raptor_free_group_map, 0);
206
207 rss_serializer->user_namespaces=raptor_new_sequence((raptor_sequence_free_handler*)raptor_free_namespace, NULL);
208
209 rss_serializer->is_atom=!(strcmp(name,"atom"));
210
211 rss_serializer->nstack=raptor_new_namespaces_v2(serializer->world,
212 (raptor_simple_message_handler)raptor_serializer_simple_error,
213 serializer,
214 1);
215
216 rss_serializer->xml_literal_dt=raptor_new_uri_v2(serializer->world, raptor_xml_literal_datatype_uri_string);
217
218 return 0;
219 }
220
221
222 /**
223 * raptor_rss10_serialize_terminate:
224 * @serializer: serializer object
225 *
226 * INTERNAL (raptor_serializer_factory API) - destroy a serializer
227 */
228 static void
raptor_rss10_serialize_terminate(raptor_serializer * serializer)229 raptor_rss10_serialize_terminate(raptor_serializer* serializer)
230 {
231 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
232 int i;
233 raptor_world* world=serializer->world;
234
235 raptor_rss_model_clear(&rss_serializer->model);
236 raptor_rss_common_terminate(world);
237
238 if(rss_serializer->triples)
239 raptor_free_sequence(rss_serializer->triples);
240
241 if(rss_serializer->items)
242 raptor_free_sequence(rss_serializer->items);
243
244 if(rss_serializer->enclosures)
245 raptor_free_sequence(rss_serializer->enclosures);
246
247 if(rss_serializer->seq_uri)
248 raptor_free_uri_v2(rss_serializer->world, rss_serializer->seq_uri);
249
250 if(rss_serializer->xml_writer)
251 raptor_free_xml_writer(rss_serializer->xml_writer);
252
253 for(i=0; i<RAPTOR_RSS_NAMESPACES_SIZE;i++) {
254 if(rss_serializer->nspaces[i])
255 raptor_free_namespace(rss_serializer->nspaces[i]);
256 }
257
258 if(rss_serializer->free_default_nspace && rss_serializer->default_nspace)
259 raptor_free_namespace(rss_serializer->default_nspace);
260
261 if(rss_serializer->xml_nspace)
262 raptor_free_namespace(rss_serializer->xml_nspace);
263
264 if(rss_serializer->user_namespaces)
265 raptor_free_sequence(rss_serializer->user_namespaces);
266
267 /* all raptor_namespace* objects must be freed BEFORE the stack
268 * they are attached to here: */
269 if(rss_serializer->nstack)
270 raptor_free_namespaces(rss_serializer->nstack);
271
272 if(rss_serializer->group_map)
273 raptor_free_avltree(rss_serializer->group_map);
274
275 if(world->rss_fields_info_qnames) {
276 for(i=0; i< RAPTOR_RSS_FIELDS_SIZE; i++) {
277 if(world->rss_fields_info_qnames[i])
278 raptor_free_qname(world->rss_fields_info_qnames[i]);
279 }
280 RAPTOR_FREE(raptor_qname* array, world->rss_fields_info_qnames);
281 world->rss_fields_info_qnames=NULL;
282 }
283
284 if(world->rss_types_info_qnames) {
285 for(i=0; i< RAPTOR_RSS_COMMON_SIZE; i++) {
286 if(world->rss_types_info_qnames[i])
287 raptor_free_qname(world->rss_types_info_qnames[i]);
288 }
289 RAPTOR_FREE(raptor_wname* array, world->rss_types_info_qnames);
290 world->rss_types_info_qnames=NULL;
291 }
292
293 if(rss_serializer->xml_literal_dt)
294 raptor_free_uri_v2(rss_serializer->world, rss_serializer->xml_literal_dt);
295 }
296
297
298 /**
299 * raptor_rss10_move_statements:
300 * @rss_serializer: serializer object
301 * @type: item type
302 * @item: item object
303 *
304 * INTERNAL - Move statements from the stored triples into item @item
305 * that match @item's URI as subject.
306 *
307 * Return value: count of number of triples moved
308 */
309 static int
raptor_rss10_move_statements(raptor_rss10_serializer_context * rss_serializer,raptor_rss_type type,raptor_rss_item * item)310 raptor_rss10_move_statements(raptor_rss10_serializer_context *rss_serializer,
311 raptor_rss_type type,
312 raptor_rss_item *item)
313 {
314 int t;
315 int count=0;
316 int is_atom=rss_serializer->is_atom;
317
318 for(t=0; t< raptor_sequence_size(rss_serializer->triples); t++) {
319 raptor_statement_v2* s;
320 int f;
321
322 s=(raptor_statement_v2*)raptor_sequence_get_at(rss_serializer->triples, t);
323 if(!s)
324 continue;
325
326 if(s->s->subject_type != RAPTOR_IDENTIFIER_TYPE_RESOURCE ||
327 !raptor_uri_equals_v2(rss_serializer->world, (raptor_uri*)s->s->subject, item->uri))
328 continue;
329
330 /* now we know this triple is associated with the item URI
331 * and can count the relevant triples */
332 count++;
333
334 /* add triples with anonymous object to the general triples sequence
335 * for this item, and to the group map (blank node closure)
336 */
337 if(s->s->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
338 raptor_uri* fake_uri=raptor_new_uri_v2(rss_serializer->world, (const unsigned char*)s->s->object);
339 raptor_rss10_set_item_group(rss_serializer, fake_uri, item);
340 raptor_free_uri_v2(rss_serializer->world, fake_uri);
341
342 RAPTOR_DEBUG4("Moved anonymous value property URI <%s> for typed node %i - %s\n",
343 raptor_uri_as_string_v2(rss_serializer->world, (raptor_uri*)s->s->predicate),
344 type, raptor_rss_items_info[type].name);
345 s=(raptor_statement_v2*)raptor_sequence_delete_at(rss_serializer->triples,
346 t);
347 raptor_sequence_push(item->triples, s);
348 continue;
349 }
350
351
352 /* otherwise process object value types resource or literal */
353 for(f=0; f < RAPTOR_RSS_FIELDS_SIZE; f++) {
354 if(!rss_serializer->world->rss_fields_info_uris[f])
355 continue;
356
357 if((s->s->predicate_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE ||
358 s->s->predicate_type == RAPTOR_IDENTIFIER_TYPE_PREDICATE) &&
359 s->s->object_type != RAPTOR_IDENTIFIER_TYPE_ANONYMOUS &&
360 raptor_uri_equals_v2(rss_serializer->world,
361 (raptor_uri*)s->s->predicate,
362 rss_serializer->world->rss_fields_info_uris[f])) {
363 raptor_rss_field* field=raptor_rss_new_field(rss_serializer->world);
364
365 /* found field this triple to go in 'item' so move the
366 * object value over
367 */
368 if(s->s->object_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE)
369 field->uri=(raptor_uri*)s->s->object;
370 else {
371 field->value=(unsigned char*)s->s->object;
372 if(s->s->object_literal_datatype &&
373 raptor_uri_equals_v2(rss_serializer->world,
374 s->s->object_literal_datatype,
375 rss_serializer->xml_literal_dt))
376 field->is_xml=1;
377 if(f == RAPTOR_RSS_FIELD_CONTENT_ENCODED)
378 field->is_xml=1;
379 if(f == RAPTOR_RSS_FIELD_ATOM_SUMMARY && *field->value == '<')
380 field->is_xml=1;
381 }
382 s->s->object=NULL;
383
384 if(is_atom) {
385 int i;
386
387 /* Rewrite item fields rss->atom */
388 for(i=0; raptor_atom_to_rss[i].from != RAPTOR_RSS_FIELD_UNKNOWN; i++) {
389 int from_f=raptor_atom_to_rss[i].to;
390 int to_f=raptor_atom_to_rss[i].from;
391
392 /* Do not rewrite to atom0.3 terms */
393 if(raptor_rss_fields_info[to_f].nspace == ATOM0_3_NS)
394 continue;
395
396 if(f == from_f &&
397 !(item->fields[to_f] && item->fields[to_f]->value)) {
398 f= to_f;
399 if(to_f == RAPTOR_RSS_FIELD_ATOM_SUMMARY && *field->value == '<')
400 field->is_xml=1;
401 field->is_mapped=1;
402 RAPTOR_DEBUG5("Moved field %d - %s to field %d - %s\n", from_f, raptor_rss_fields_info[from_f].name, to_f, raptor_rss_fields_info[to_f].name);
403 break;
404 }
405 }
406 } /* end is atom field to map */
407
408 RAPTOR_DEBUG1("Adding field\n");
409 raptor_rss_item_add_field(item, f, field);
410 raptor_sequence_set_at(rss_serializer->triples, t, NULL);
411 break;
412 }
413 } /* end for field loop */
414
415 /* loop ended early so triple was assocated with a field - continue */
416 if(f < RAPTOR_RSS_FIELDS_SIZE)
417 continue;
418
419
420 /* otherwise triple was not found as a field so store in triples
421 * sequence
422 */
423 RAPTOR_DEBUG4("UNKNOWN property URI <%s> for typed node %i - %s\n",
424 raptor_uri_as_string_v2(rss_serializer->world, (raptor_uri*)s->s->predicate),
425 type, raptor_rss_items_info[type].name);
426 s=(raptor_statement_v2*)raptor_sequence_delete_at(rss_serializer->triples,
427 t);
428 raptor_sequence_push(item->triples, s);
429
430 } /* end for all triples */
431
432 #ifdef RAPTOR_DEBUG
433 if(count > 0)
434 RAPTOR_DEBUG5("Moved %d triples to typed node %i - %s with uri <%s>\n",
435 count, type, raptor_rss_items_info[type].name,
436 raptor_uri_as_string_v2(rss_serializer->world, (raptor_uri*)item->uri));
437 #endif
438
439 return count;
440 }
441
442
443 /**
444 * raptor_rss10_move_anonymous_statements:
445 * @rss_serializer: serializer object
446 *
447 * INTERNAL - Move statements with a blank node subject to the appropriate item
448 *
449 */
450 static int
raptor_rss10_move_anonymous_statements(raptor_rss10_serializer_context * rss_serializer)451 raptor_rss10_move_anonymous_statements(raptor_rss10_serializer_context *rss_serializer)
452 {
453 int t;
454 int handled=1;
455 int round=0;
456 #ifdef RAPTOR_DEBUG
457 int moved_count=0;
458 #endif
459
460 for(round=0; handled; round++) {
461 handled=0;
462
463 for(t=0; t< raptor_sequence_size(rss_serializer->triples); t++) {
464 raptor_statement_v2* s;
465 raptor_uri* fake_uri;
466 raptor_rss_item* item;
467
468 s=(raptor_statement_v2*)raptor_sequence_get_at(rss_serializer->triples, t);
469 if(!s)
470 continue;
471
472 if(s->s->subject_type != RAPTOR_IDENTIFIER_TYPE_ANONYMOUS)
473 continue;
474
475 fake_uri=raptor_new_uri_v2(rss_serializer->world, (const unsigned char*)s->s->subject);
476 item=raptor_rss10_get_group_item(rss_serializer, fake_uri);
477 raptor_free_uri_v2(rss_serializer->world, fake_uri);
478
479 if(item) {
480 /* triple matched an existing item */
481 s=(raptor_statement_v2*)raptor_sequence_delete_at(rss_serializer->triples,
482 t);
483 raptor_sequence_push(item->triples, s);
484 #ifdef RAPTOR_DEBUG
485 moved_count++;
486 #endif
487
488 if(s->s->object_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
489 fake_uri=raptor_new_uri_v2(rss_serializer->world, (const unsigned char*)s->s->object);
490 raptor_rss10_set_item_group(rss_serializer, fake_uri, item);
491 raptor_free_uri_v2(rss_serializer->world, fake_uri);
492 }
493
494
495 handled=1;
496 }
497 } /* end for all triples */
498
499 #ifdef RAPTOR_DEBUG
500 if(moved_count > 0)
501 RAPTOR_DEBUG3("Round %d: Moved %d triples\n", round, moved_count);
502 #endif
503 }
504
505 return 0;
506 }
507
508
509 /**
510 * raptor_rss10_move_leftover_statements:
511 * @rss_serializer: serializer object
512 *
513 * INTERNAL - Move any statements in the serializer pool to items or channel
514 *
515 */
516 static int
raptor_rss10_move_leftover_statements(raptor_rss10_serializer_context * rss_serializer)517 raptor_rss10_move_leftover_statements(raptor_rss10_serializer_context *rss_serializer) {
518 raptor_rss_model* rss_model;
519 int i;
520 int type;
521 raptor_rss_item* item;
522
523 rss_model=&rss_serializer->model;
524
525 type=RAPTOR_RSS_ITEM;
526 for(i=0; i < raptor_sequence_size(rss_serializer->items); i++) {
527 item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i);
528 raptor_rss10_move_statements(rss_serializer, (raptor_rss_type)type, item);
529 }
530
531 type=RAPTOR_RSS_CHANNEL;
532 if(rss_model->common[type]) {
533 item=rss_model->common[type];
534 raptor_rss10_move_statements(rss_serializer, (raptor_rss_type)type, item);
535 }
536
537 return 0;
538 }
539
540
541 /**
542 * raptor_rss10_remove_mapped_item_fields:
543 * @rss_serializer: serializer object
544 * @item: rss item
545 * @type: item type
546 *
547 * INTERNAL - Remove mapped fields for an item
548 *
549 */
550 static int
raptor_rss10_remove_mapped_item_fields(raptor_rss10_serializer_context * rss_serializer,raptor_rss_item * item,int type)551 raptor_rss10_remove_mapped_item_fields(raptor_rss10_serializer_context *rss_serializer,
552 raptor_rss_item* item, int type)
553 {
554 int f;
555
556 if(!item->fields_count)
557 return 0;
558
559 for(f=0; f < RAPTOR_RSS_FIELDS_SIZE; f++) {
560 raptor_rss_field* field;
561 int saw_mapped=0;
562 int saw_non_mapped=0;
563
564 for (field=item->fields[f]; field; field=field->next) {
565 if(field->is_mapped)
566 saw_mapped++;
567 else
568 saw_non_mapped++;
569 }
570
571 if(saw_mapped && saw_non_mapped) {
572 raptor_rss_field* last_field=NULL;
573 RAPTOR_DEBUG6("Item %p Field %d - %s: %d mapped %d non-mapped\n", item, f, raptor_rss_fields_info[f].name, saw_mapped, saw_non_mapped);
574
575 field=item->fields[f];
576 while(field) {
577 raptor_rss_field* next=field->next;
578 field->next=NULL;
579 if(field->is_mapped)
580 raptor_rss_field_free(field);
581 else {
582 if(!last_field)
583 item->fields[f]=field;
584 else
585 last_field->next=field;
586 last_field=field;
587 }
588 field=next;
589 }
590 }
591
592 }
593
594 return 0;
595 }
596
597
598 /**
599 * raptor_rss10_remove_mapped_fields:
600 * @rss_serializer: serializer object
601 *
602 * INTERNAL - Move statements with a blank node subject to the appropriate item
603 *
604 */
605 static int
raptor_rss10_remove_mapped_fields(raptor_rss10_serializer_context * rss_serializer)606 raptor_rss10_remove_mapped_fields(raptor_rss10_serializer_context *rss_serializer)
607 {
608 raptor_rss_model* rss_model;
609 int is_atom;
610 int i;
611
612 rss_model=&rss_serializer->model;
613 is_atom=rss_serializer->is_atom;
614
615 if(!is_atom)
616 return 0;
617
618 if(rss_model->items_count) {
619 for(i=0; i < raptor_sequence_size(rss_serializer->items); i++) {
620 raptor_rss_item* item;
621 item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i);
622 raptor_rss10_remove_mapped_item_fields(rss_serializer, item,
623 RAPTOR_RSS_ITEM);
624 }
625 }
626
627 for(i=RAPTOR_RSS_CHANNEL; i< RAPTOR_RSS_COMMON_SIZE; i++) {
628 raptor_rss_item* item;
629 for (item=rss_model->common[i]; item; item=item->next) {
630 raptor_rss10_remove_mapped_item_fields(rss_serializer, item, i);
631 }
632 }
633
634 return 0;
635 }
636
637 /**
638 * raptor_rss10_store_statement:
639 * @rss_serializer: serializer object
640 * @s: statement
641 *
642 * INTERNAL - decide where to store a statement in an item or keep pending
643 *
644 * Return value: non-0 if handled (stored)
645 */
646 static int
raptor_rss10_store_statement(raptor_rss10_serializer_context * rss_serializer,raptor_statement_v2 * s)647 raptor_rss10_store_statement(raptor_rss10_serializer_context *rss_serializer,
648 raptor_statement_v2 *s)
649 {
650 raptor_rss_item *item=NULL;
651 int handled=0;
652 int is_atom=rss_serializer->is_atom;
653 raptor_uri* fake_uri;
654
655 fake_uri=raptor_new_uri_v2(rss_serializer->world, (const unsigned char*)s->s->subject);
656 item=raptor_rss10_get_group_item(rss_serializer, fake_uri);
657 raptor_free_uri_v2(rss_serializer->world, fake_uri);
658
659 if(item && s->s->object_type != RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
660 int f;
661
662 for(f=0; f < RAPTOR_RSS_FIELDS_SIZE; f++) {
663 raptor_rss_field* field;
664 if(!rss_serializer->world->rss_fields_info_uris[f])
665 continue;
666
667 if((s->s->predicate_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE ||
668 s->s->predicate_type == RAPTOR_IDENTIFIER_TYPE_PREDICATE) &&
669 raptor_uri_equals_v2(rss_serializer->world,
670 (raptor_uri*)s->s->predicate,
671 rss_serializer->world->rss_fields_info_uris[f])) {
672 /* found field this triple to go in 'item' so move the
673 * object value over
674 */
675 field=raptor_rss_new_field(rss_serializer->world);
676 if(s->s->object_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE) {
677 field->uri=(raptor_uri*)s->s->object;
678 } else {
679 field->value=(unsigned char*)s->s->object;
680 if(s->s->object_literal_datatype &&
681 raptor_uri_equals_v2(rss_serializer->world,
682 s->s->object_literal_datatype,
683 rss_serializer->xml_literal_dt))
684 field->is_xml=1;
685 if(f == RAPTOR_RSS_FIELD_CONTENT_ENCODED)
686 field->is_xml=1;
687 if(f == RAPTOR_RSS_FIELD_ATOM_SUMMARY && *field->value == '<')
688 field->is_xml=1;
689 }
690 s->s->object=NULL;
691
692 if(is_atom) {
693 int i;
694
695 /* Rewrite item fields rss->atom */
696 for(i=0; raptor_atom_to_rss[i].from != RAPTOR_RSS_FIELD_UNKNOWN; i++) {
697 int from_f=raptor_atom_to_rss[i].to;
698 int to_f=raptor_atom_to_rss[i].from;
699
700 /* Do not rewrite to atom0.3 terms */
701 if(raptor_rss_fields_info[to_f].nspace == ATOM0_3_NS)
702 continue;
703
704 if(f == from_f &&
705 !(item->fields[to_f] && item->fields[to_f]->value)) {
706 f= to_f;
707 if(to_f == RAPTOR_RSS_FIELD_ATOM_SUMMARY && *field->value == '<')
708 field->is_xml=1;
709 field->is_mapped=1;
710 RAPTOR_DEBUG5("Moved field %d - %s to field %d - %s\n", from_f, raptor_rss_fields_info[from_f].name, to_f, raptor_rss_fields_info[to_f].name);
711 break;
712 }
713 }
714 }
715
716 RAPTOR_DEBUG1("Adding field\n");
717 raptor_rss_item_add_field(item, f, field);
718 raptor_free_statement_v2(s);
719 #if RAPTOR_DEBUG > 1
720 RAPTOR_DEBUG2("Stored statement under typed node %p\n", item);
721 #endif
722
723 handled=1;
724 break;
725 }
726 }
727 }
728
729 if(!handled) {
730 raptor_sequence_push(rss_serializer->triples, s);
731 #if RAPTOR_DEBUG > 1
732 fprintf(stderr,"Stored statement: ");
733 raptor_print_statement_as_ntriples_v2(s, stderr);
734 fprintf(stderr,"\n");
735 #endif
736 handled=1;
737 }
738
739 return handled;
740 }
741
742
743 static int
raptor_rss10_serialize_start(raptor_serializer * serializer)744 raptor_rss10_serialize_start(raptor_serializer* serializer)
745 {
746 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
747
748 if(serializer->feature_rss_triples) {
749 if(!strcmp((const char*)serializer->feature_rss_triples,
750 "none"))
751 rss_serializer->rss_triples_mode=0;
752 else if(!strcmp((const char*)serializer->feature_rss_triples,
753 "rdf-xml"))
754 rss_serializer->rss_triples_mode=1;
755 else if(!strcmp((const char*)serializer->feature_rss_triples,
756 "atom-triples"))
757 rss_serializer->rss_triples_mode=2;
758 else
759 rss_serializer->rss_triples_mode=0;
760 }
761
762 return 0;
763 }
764
765
766 /**
767 * raptor_rss10_serialize_statement:
768 * @serializer: serializer object
769 * @statement: statement
770 *
771 * INTERNAL (raptor_serializer_factory API) - Serialize a statement
772 *
773 * Return value: non-0 on failure
774 */
775 static int
raptor_rss10_serialize_statement(raptor_serializer * serializer,const raptor_statement * statement)776 raptor_rss10_serialize_statement(raptor_serializer* serializer,
777 const raptor_statement *statement)
778 {
779 raptor_rss10_serializer_context *rss_serializer;
780 raptor_rss_model *rss_model;
781 int handled = 0;
782 int i;
783 raptor_rss_type type;
784 raptor_rss_item *item = NULL;
785
786 rss_serializer = (raptor_rss10_serializer_context*)serializer->context;
787 rss_model = &rss_serializer->model;
788
789 #if RAPTOR_DEBUG > 1
790 if(1) {
791 raptor_statement_v2 s2;
792 RAPTOR_DEBUG1("Processing statement\n ");
793 s2.s = (raptor_statement*)statement;
794 s2.world = rss_serializer->world;
795 raptor_print_statement_as_ntriples_v2(&s2, stderr);
796 fputc('\n', stderr);
797 }
798 #endif
799
800 if(raptor_uri_equals_v2(rss_serializer->world,
801 (raptor_uri*)statement->predicate,
802 RAPTOR_RSS_RSS_items_URI(rss_model))) {
803 /* ignore any triple (? rss:items ?) - is infered */
804 return 0;
805 }
806
807 if(!raptor_uri_equals_v2(rss_serializer->world,
808 (raptor_uri*)statement->predicate,
809 RAPTOR_RSS_RDF_type_URI(rss_model)))
810 goto savetriple;
811
812
813 /* Look for triple (?resource rdf:type rdf:Seq) */
814 if(statement->object_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE &&
815 raptor_uri_equals_v2(rss_serializer->world,
816 (raptor_uri*)statement->object,
817 RAPTOR_RSS_RDF_Seq_URI(rss_model))) {
818
819 if(statement->subject_type==RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
820 RAPTOR_DEBUG2("Saw rdf:Seq with blank node %s\n",
821 (char*)statement->subject);
822 rss_serializer->seq_uri = raptor_new_uri_v2(rss_serializer->world,
823 (unsigned char*)statement->subject);
824 } else {
825 RAPTOR_DEBUG2("Saw rdf:Seq with URI <%s>\n",
826 raptor_uri_as_string_v2(rss_serializer->world,
827 (raptor_uri*)statement->subject));
828 rss_serializer->seq_uri=raptor_uri_copy_v2(rss_serializer->world,
829 rss_serializer->seq_uri);
830 }
831
832 handled = 1;
833 goto savetriple;
834 }
835
836
837 /* look for triple: (? rdf:type ?) to find containers and blocks */
838 type = RAPTOR_RSS_NONE;
839 for(i = 0; i < RAPTOR_RSS_COMMON_SIZE; i++) {
840 raptor_uri *item_uri = serializer->world->rss_types_info_uris[i];
841 if(item_uri &&
842 raptor_uri_equals_v2(rss_serializer->world,
843 (raptor_uri*)statement->object, item_uri)) {
844 type = (raptor_rss_type)i;
845 RAPTOR_DEBUG4("Found typed node %i - %s with URI <%s>\n", type,
846 raptor_rss_items_info[type].name,
847 raptor_uri_as_string_v2(rss_serializer->world,
848 (raptor_uri*)statement->subject));
849 break;
850 }
851 }
852
853 if(type == RAPTOR_RSS_NONE) {
854 RAPTOR_DEBUG2("UNKNOWN typed node with type URI <%s>\n",
855 raptor_uri_as_string_v2(rss_serializer->world,
856 (raptor_uri*)statement->object));
857 goto savetriple;
858 }
859
860
861 if(type == RAPTOR_RSS_ITEM) {
862 for(i = 0; i < raptor_sequence_size(rss_serializer->items); i++) {
863 item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i);
864 if(raptor_rss_item_equals_statement_subject(item, statement))
865 break;
866
867 }
868 if(i < raptor_sequence_size(rss_serializer->items)) {
869 RAPTOR_DEBUG2("Found RSS item at entry %d in sequence of items\n", i);
870 } else {
871 RAPTOR_DEBUG2("RSS item URI <%s> is not in sequence of items\n",
872 raptor_uri_as_string_v2(rss_serializer->world,
873 (raptor_uri*)statement->subject));
874 item = NULL;
875 }
876 } else if(type == RAPTOR_RSS_ENCLOSURE) {
877 for(i = 0; i < raptor_sequence_size(rss_serializer->enclosures); i++) {
878 item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->enclosures, i);
879 if(raptor_rss_item_equals_statement_subject(item, statement))
880 break;
881
882 }
883 if(i < raptor_sequence_size(rss_serializer->items)) {
884 RAPTOR_DEBUG2("Found enclosure at entry %d in sequence of enclosures\n", i);
885 } else {
886 RAPTOR_DEBUG2("Add new enclosure to sequence with URI <%s>\n",
887 raptor_uri_as_string_v2(rss_serializer->world,
888 (raptor_uri*)statement->subject));
889
890 item = raptor_new_rss_item(rss_serializer->world);
891 raptor_sequence_push(rss_serializer->enclosures, item);
892 }
893 } else {
894 item=raptor_rss_model_add_common(rss_model, type);
895 }
896
897
898 if(item) {
899 raptor_rss_item_set_uri(item, (raptor_uri*)statement->subject);
900
901 /* Move any existing statements to the newly discovered item */
902 raptor_rss10_move_statements(rss_serializer, type, item);
903
904 raptor_rss10_set_item_group(rss_serializer, item->uri, item);
905
906 handled = 1;
907 }
908
909
910 savetriple:
911 if(!handled) {
912 raptor_statement_v2 *t;
913 t = raptor_statement_copy_v2_from_v1(rss_serializer->world, statement);
914 if(t)
915 handled = raptor_rss10_store_statement(rss_serializer, t);
916 }
917
918 return 0;
919 }
920
921
922 static void
raptor_rss10_build_items(raptor_rss10_serializer_context * rss_serializer)923 raptor_rss10_build_items(raptor_rss10_serializer_context *rss_serializer)
924 {
925 raptor_rss_model* rss_model=&rss_serializer->model;
926 int i;
927
928 if(!rss_serializer->seq_uri)
929 return;
930
931 for(i=0; i < raptor_sequence_size(rss_serializer->triples); i++) {
932 int ordinal= -1;
933 raptor_uri* fake_uri=NULL;
934 raptor_statement_v2* s;
935
936 s=(raptor_statement_v2*)raptor_sequence_get_at(rss_serializer->triples, i);
937 if(!s)
938 continue;
939
940 #if RAPTOR_DEBUG > 1
941 RAPTOR_DEBUG1("Processing statement\n ");
942 raptor_print_statement_as_ntriples_v2(s, stderr);
943 fputc('\n', stderr);
944 #endif
945
946 /* skip triples that are not ? ? <uri> */
947 if(s->s->object_type != RAPTOR_IDENTIFIER_TYPE_RESOURCE) {
948 RAPTOR_DEBUG1("Not ? ? <uri> - continuing\n");
949 continue;
950 }
951
952
953 if(s->s->subject_type == RAPTOR_IDENTIFIER_TYPE_ANONYMOUS)
954 fake_uri=raptor_new_uri_v2(rss_serializer->world, (unsigned char*)s->s->subject);
955 else
956 fake_uri=raptor_uri_copy_v2(rss_serializer->world, (raptor_uri*)s->s->subject);
957
958 if(raptor_uri_equals_v2(rss_serializer->world, fake_uri, rss_serializer->seq_uri)) {
959 /* found <seq URI> <some predicate> <some URI> triple */
960
961 if(s->s->predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL)
962 ordinal= *((int*)s->s->predicate);
963 else { /* predicate is a resource */
964 const unsigned char* uri_str;
965 uri_str= raptor_uri_as_string_v2(rss_serializer->world, (raptor_uri*)s->s->predicate);
966 if(!strncmp((const char*)uri_str, "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44))
967 ordinal= raptor_check_ordinal(uri_str+44);
968 }
969 RAPTOR_DEBUG3("Found RSS 1.0 item %d with URI <%s>\n", ordinal,
970 raptor_uri_as_string_v2(rss_serializer->world, (raptor_uri*)s->s->object));
971
972 if(ordinal >= 0) {
973 raptor_rss_item *item;
974
975 item = raptor_new_rss_item(rss_serializer->world);
976
977 raptor_rss_item_set_uri(item, (raptor_uri*)s->s->object);
978
979 raptor_sequence_set_at(rss_serializer->items, ordinal-1, item);
980
981 raptor_sequence_set_at(rss_serializer->triples, i, NULL);
982
983 /* Move any existing statements to the newly discovered item */
984 raptor_rss10_move_statements(rss_serializer, RAPTOR_RSS_ITEM, item);
985
986 raptor_rss10_set_item_group(rss_serializer, item->uri, item);
987 }
988 }
989
990 raptor_free_uri_v2(rss_serializer->world, fake_uri);
991 }
992
993 rss_model->items_count=raptor_sequence_size(rss_serializer->items);
994 }
995
996
997 static void
raptor_rss10_build_xml_names(raptor_serializer * serializer,int is_entry)998 raptor_rss10_build_xml_names(raptor_serializer *serializer, int is_entry)
999 {
1000 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
1001 raptor_rss_model* rss_model=&rss_serializer->model;
1002 raptor_uri *base_uri=serializer->base_uri;
1003 raptor_xml_element *element;
1004 raptor_qname *qname;
1005 const unsigned char*root_local_name;
1006 int i;
1007 int is_atom=rss_serializer->is_atom;
1008 const raptor_rss_item_info *item_node_type;
1009 int item_node_typei;
1010 const unsigned char* ns_uri;
1011 raptor_world* world=serializer->world;
1012 int default_ns_id;
1013 const unsigned char *default_prefix;
1014
1015 if(is_atom) {
1016 default_ns_id = ATOM1_0_NS;
1017 ns_uri = raptor_atom_namespace_uri;
1018 root_local_name = (is_entry ? (const unsigned char*)"entry" :
1019 (const unsigned char*)"feed");
1020 item_node_typei = RAPTOR_ATOM_ENTRY;
1021 } else {
1022 default_ns_id = RSS1_0_NS;
1023 ns_uri = raptor_rdf_namespace_uri;
1024 root_local_name = (const unsigned char*)"RDF";
1025 item_node_typei = RAPTOR_RSS_ITEM;
1026 }
1027 item_node_type = &raptor_rss_items_info[item_node_typei];
1028
1029 if(serializer->feature_prefix_elements)
1030 /* declare this NS with standard prefix */
1031 default_prefix = (const unsigned char*)raptor_rss_namespaces_info[default_ns_id].prefix;
1032 else
1033 default_prefix = NULL;
1034
1035 rss_serializer->default_nspace = raptor_new_namespace(rss_serializer->nstack,
1036 default_prefix, ns_uri,
1037 0);
1038 rss_serializer->free_default_nspace = 1;
1039 if(serializer->feature_prefix_elements) {
1040 rss_serializer->nspaces[default_ns_id] = rss_serializer->default_nspace;
1041 rss_serializer->free_default_nspace = 0;
1042 }
1043
1044 rss_serializer->xml_nspace = raptor_new_namespace(rss_serializer->nstack,
1045 (const unsigned char*)"xml",
1046 (const unsigned char*)raptor_xml_namespace_uri,
1047 0);
1048
1049
1050 /* Now we have a namespace stack, declare the namespaces */
1051 for(i = 0; i < RAPTOR_RSS_NAMESPACES_SIZE; i++) {
1052 raptor_uri* uri = serializer->world->rss_namespaces_info_uris[i];
1053 const unsigned char *prefix;
1054
1055 prefix = (const unsigned char*)raptor_rss_namespaces_info[i].prefix;
1056 if(!prefix)
1057 continue;
1058
1059 if(i == default_ns_id) {
1060 if(serializer->feature_prefix_elements)
1061 prefix = NULL;
1062 }
1063
1064 if(uri) {
1065 raptor_namespace* nspace;
1066 nspace = raptor_new_namespace_from_uri(rss_serializer->nstack, prefix,
1067 uri, 0);
1068 rss_serializer->nspaces[i] = nspace;
1069 }
1070 }
1071
1072
1073 qname = raptor_new_qname_from_namespace_local_name_v2(serializer->world,
1074 rss_serializer->nspaces[default_ns_id],
1075 root_local_name,
1076 NULL);
1077 if(base_uri)
1078 base_uri = raptor_uri_copy_v2(rss_serializer->world, base_uri);
1079 element = raptor_new_xml_element(qname, NULL, base_uri);
1080 rss_serializer->root_element = element;
1081
1082
1083 /* Declare the namespaces on the root element */
1084 raptor_xml_element_declare_namespace(element, rss_serializer->default_nspace);
1085
1086 for(i = 0; i < RAPTOR_RSS_NAMESPACES_SIZE; i++) {
1087 const unsigned char *prefix;
1088
1089 prefix = (const unsigned char*)raptor_rss_namespaces_info[i].prefix;
1090 if(!prefix && i != default_ns_id)
1091 continue;
1092
1093 if(rss_serializer->nspaces[i])
1094 raptor_xml_element_declare_namespace(element, rss_serializer->nspaces[i]);
1095 }
1096 for(i = 0; i < raptor_sequence_size(rss_serializer->user_namespaces); i++) {
1097 raptor_namespace* nspace;
1098 nspace = (raptor_namespace*)raptor_sequence_get_at(rss_serializer->user_namespaces, i);
1099
1100 /* Ignore user setting default namespace prefix */
1101 if(!nspace->prefix)
1102 continue;
1103
1104 raptor_xml_element_declare_namespace(element, nspace);
1105 }
1106
1107
1108 world->rss_fields_info_qnames=(raptor_qname**)RAPTOR_CALLOC(raptor_qname* array, RAPTOR_RSS_FIELDS_SIZE, sizeof(raptor_qname*));
1109 if(!world->rss_fields_info_qnames)
1110 return;
1111 for(i=0; i< RAPTOR_RSS_FIELDS_SIZE; i++) {
1112 int n=raptor_rss_fields_info[i].nspace;
1113 raptor_namespace* nspace=rss_serializer->nspaces[n];
1114 world->rss_fields_info_qnames[i]=raptor_new_qname_from_namespace_local_name_v2(serializer->world,
1115 nspace,
1116 (const unsigned char*)raptor_rss_fields_info[i].name,
1117 NULL);
1118 if(!world->rss_fields_info_qnames[i])
1119 return;
1120 }
1121
1122 world->rss_types_info_qnames=(raptor_qname**)RAPTOR_CALLOC(raptor_qname* array, RAPTOR_RSS_COMMON_SIZE, sizeof(raptor_qname*));
1123 if(!world->rss_types_info_qnames)
1124 return;
1125 for(i=0; i< RAPTOR_RSS_COMMON_SIZE; i++) {
1126 int n=raptor_rss_items_info[i].nspace;
1127 raptor_namespace* nspace=rss_serializer->nspaces[n];
1128 if(nspace) {
1129 world->rss_types_info_qnames[i]=raptor_new_qname_from_namespace_local_name_v2(serializer->world,
1130 nspace,
1131 (const unsigned char*)raptor_rss_items_info[i].name,
1132 NULL);
1133 if(!world->rss_types_info_qnames[i])
1134 return;
1135 }
1136 }
1137
1138 for(i=0; i< RAPTOR_RSS_COMMON_SIZE; i++) {
1139 raptor_rss_item* item;
1140 for (item=rss_model->common[i]; item; item=item->next) {
1141 int typei=i;
1142 if(!item->fields_count)
1143 continue;
1144 if(is_atom) {
1145 if(typei == RAPTOR_RSS_CHANNEL)
1146 typei=RAPTOR_ATOM_FEED;
1147 else if(typei == RAPTOR_RSS_ITEM)
1148 typei=RAPTOR_ATOM_ENTRY;
1149 }
1150 item->node_type=&raptor_rss_items_info[typei];
1151 item->node_typei=typei;
1152 }
1153 }
1154
1155 for(i=0; i < raptor_sequence_size(rss_serializer->items); i++) {
1156 raptor_rss_item* item;
1157 item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i);
1158 item->node_type=item_node_type;
1159 item->node_typei=item_node_typei;
1160 }
1161
1162 for(i=0; i < raptor_sequence_size(rss_serializer->enclosures); i++) {
1163 raptor_rss_item* item;
1164 item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->enclosures, i);
1165 item->node_type=&raptor_rss_items_info[RAPTOR_RSS_ENCLOSURE];
1166 item->node_typei=RAPTOR_RSS_ENCLOSURE;
1167 }
1168
1169 }
1170
1171
1172 static void
raptor_rss10_emit_atom_triples_map(raptor_serializer * serializer,int is_feed,const unsigned char * map_element_name)1173 raptor_rss10_emit_atom_triples_map(raptor_serializer *serializer, int is_feed,
1174 const unsigned char* map_element_name)
1175 {
1176 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
1177 raptor_xml_writer* xml_writer;
1178 raptor_uri *base_uri=serializer->base_uri;
1179 raptor_uri* base_uri_copy=NULL;
1180 raptor_namespace* at_nspace=rss_serializer->nspaces[ATOMTRIPLES_NS];
1181 raptor_xml_element* at_map_root_element;
1182 raptor_qname *at_map_root_qname;
1183 int i;
1184
1185 xml_writer=rss_serializer->xml_writer;
1186
1187 at_map_root_qname=raptor_new_qname_from_namespace_local_name_v2(serializer->world, at_nspace,
1188 (const unsigned char*)map_element_name, NULL);
1189 base_uri_copy=base_uri ? raptor_uri_copy_v2(rss_serializer->world, base_uri) : NULL;
1190 at_map_root_element=raptor_new_xml_element(at_map_root_qname, NULL,
1191 base_uri_copy);
1192
1193 raptor_xml_writer_start_element(xml_writer, at_map_root_element);
1194
1195 /* Walk list of fields mapped atom to rss */
1196 for(i=0; raptor_atom_to_rss[i].from != RAPTOR_RSS_FIELD_UNKNOWN; i++) {
1197 int from_f=raptor_atom_to_rss[i].from;
1198 int to_f=raptor_atom_to_rss[i].to;
1199 const raptor_rss_field_info* from_field_info = &raptor_rss_fields_info[from_f];
1200 const raptor_rss_field_info* to_field_info = &raptor_rss_fields_info[to_f];
1201 raptor_xml_element* at_map_element;
1202 raptor_qname *at_map_qname;
1203 raptor_qname** at_map_attrs;
1204 const char* predicate_prefix;
1205 unsigned char* ruri_string;
1206
1207 /* Do not rewrite to atom0.3 terms */
1208 if(to_field_info->nspace == ATOM0_3_NS)
1209 continue;
1210
1211 /* atom:feed only contains some fields that are mapped */
1212 if(is_feed && !(from_f == RAPTOR_RSS_FIELD_ATOM_ID ||
1213 from_f == RAPTOR_RSS_FIELD_ATOM_UPDATED ||
1214 from_f == RAPTOR_RSS_FIELD_ATOM_RIGHTS ||
1215 from_f == RAPTOR_RSS_FIELD_ATOM_TITLE))
1216 continue;
1217
1218 predicate_prefix=raptor_rss_namespaces_info[from_field_info->nspace].prefix;
1219 if(!predicate_prefix)
1220 continue;
1221
1222 /* <at:map property="{property URI}">{atom element}</at:map> */
1223 at_map_qname=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1224 at_nspace,
1225 map_element_name,
1226 NULL);
1227 base_uri_copy=base_uri ? raptor_uri_copy_v2(rss_serializer->world, base_uri) : NULL;
1228 at_map_element=raptor_new_xml_element(at_map_qname, NULL, base_uri_copy);
1229
1230
1231 at_map_attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 1,
1232 sizeof(raptor_qname*));
1233 ruri_string=raptor_uri_to_relative_uri_string_v2(serializer->world, base_uri, serializer->world->rss_fields_info_uris[to_f]);
1234 at_map_attrs[0]=raptor_new_qname(rss_serializer->nstack,
1235 (const unsigned char*)"property",
1236 ruri_string,
1237 NULL, NULL); /* errors */
1238 raptor_free_memory(ruri_string);
1239 raptor_xml_element_set_attributes(at_map_element, at_map_attrs, 1);
1240
1241 raptor_xml_writer_start_element(xml_writer, at_map_element);
1242 raptor_xml_writer_cdata(xml_writer, (const unsigned char*)predicate_prefix);
1243 raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)":", 1);
1244 raptor_xml_writer_cdata(xml_writer,
1245 (const unsigned char*)from_field_info->name);
1246 raptor_xml_writer_end_element(xml_writer, at_map_element);
1247
1248 raptor_free_xml_element(at_map_element);
1249 }
1250
1251 raptor_xml_writer_end_element(xml_writer, at_map_root_element);
1252
1253 raptor_free_xml_element(at_map_root_element);
1254 }
1255
1256
1257
1258 /* atom-specific feed XML elements */
1259 static void
raptor_rss10_emit_atom_feed(raptor_serializer * serializer,raptor_rss_item * item)1260 raptor_rss10_emit_atom_feed(raptor_serializer *serializer,
1261 raptor_rss_item *item)
1262 {
1263 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
1264 raptor_xml_writer* xml_writer;
1265 raptor_uri *base_uri=serializer->base_uri;
1266 raptor_uri* base_uri_copy=NULL;
1267 raptor_xml_element* atom_link_element;
1268 raptor_qname *atom_link_qname;
1269 raptor_qname** atom_link_attrs;
1270 raptor_namespace* atom_nspace=rss_serializer->nspaces[ATOM1_0_NS];
1271 unsigned char* ruri_string;
1272
1273 xml_writer=rss_serializer->xml_writer;
1274
1275 atom_link_qname=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1276 atom_nspace,
1277 (const unsigned char*)"link",
1278 NULL);
1279 base_uri_copy=base_uri ? raptor_uri_copy_v2(rss_serializer->world, base_uri) : NULL;
1280 atom_link_element=raptor_new_xml_element(atom_link_qname, NULL, base_uri_copy);
1281
1282 atom_link_attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 2, sizeof(raptor_qname*));
1283 ruri_string = raptor_uri_to_relative_uri_string_v2(rss_serializer->world,
1284 base_uri, item->uri);
1285
1286 atom_link_attrs[0]=raptor_new_qname(rss_serializer->nstack,
1287 (const unsigned char*)"href",
1288 ruri_string,
1289 NULL, NULL); /* errors */
1290 raptor_free_memory(ruri_string);
1291 atom_link_attrs[1]=raptor_new_qname(rss_serializer->nstack,
1292 (const unsigned char*)"rel",
1293 (const unsigned char*)"self",
1294 NULL, NULL); /* errors */
1295 raptor_xml_element_set_attributes(atom_link_element, atom_link_attrs, 2);
1296
1297 raptor_xml_writer_empty_element(xml_writer, atom_link_element);
1298
1299 raptor_free_xml_element(atom_link_element);
1300
1301 if(rss_serializer->rss_triples_mode == 2) {
1302 raptor_rss10_emit_atom_triples_map(serializer, 1,
1303 (const unsigned char*)"feedmap");
1304 raptor_rss10_emit_atom_triples_map(serializer, 0,
1305 (const unsigned char*)"entrymap");
1306 }
1307 }
1308
1309
1310 /* emit the RSS 1.0-specific rdf:Seq and rss:item XML elements */
1311 static void
raptor_rss10_emit_rss_items(raptor_serializer * serializer)1312 raptor_rss10_emit_rss_items(raptor_serializer *serializer)
1313 {
1314 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
1315 raptor_xml_writer* xml_writer;
1316 raptor_uri *base_uri=serializer->base_uri;
1317 raptor_uri* base_uri_copy=NULL;
1318 raptor_xml_element* rss_items_predicate;
1319 int i;
1320 raptor_qname *rdf_Seq_qname;
1321 raptor_xml_element *rdf_Seq_element;
1322
1323 if(!raptor_sequence_size(rss_serializer->items))
1324 return;
1325
1326 xml_writer=rss_serializer->xml_writer;
1327
1328 rdf_Seq_qname=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1329 rss_serializer->default_nspace,
1330 (const unsigned char*)"Seq",
1331 NULL);
1332
1333 base_uri_copy=base_uri ? raptor_uri_copy_v2(rss_serializer->world, base_uri) : NULL;
1334 rdf_Seq_element=raptor_new_xml_element(rdf_Seq_qname, NULL, base_uri_copy);
1335
1336 /* make the <rss:items><rdf:Seq><rdf:li /> .... </rdf:Seq></rss:items> */
1337
1338 base_uri_copy=base_uri ? raptor_uri_copy_v2(rss_serializer->world, base_uri) : NULL;
1339 rss_items_predicate=raptor_new_xml_element(raptor_qname_copy(serializer->world->rss_fields_info_qnames[RAPTOR_RSS_FIELD_ITEMS]), NULL, base_uri_copy);
1340
1341 raptor_xml_writer_start_element(xml_writer, rss_items_predicate);
1342
1343 raptor_xml_writer_start_element(xml_writer, rdf_Seq_element);
1344
1345 for(i=0; i < raptor_sequence_size(rss_serializer->items); i++) {
1346 raptor_rss_item* item_item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i);
1347 raptor_qname *rdf_li_qname;
1348 raptor_xml_element *rdf_li_element;
1349 raptor_qname **attrs;
1350 unsigned char* ruri_string;
1351
1352 rdf_li_qname=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1353 rss_serializer->default_nspace,
1354 (const unsigned char*)"li",
1355 NULL);
1356 base_uri_copy=base_uri ? raptor_uri_copy_v2(rss_serializer->world, base_uri) : NULL;
1357 rdf_li_element=raptor_new_xml_element(rdf_li_qname, NULL, base_uri_copy);
1358 attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 1, sizeof(raptor_qname*));
1359 ruri_string=raptor_uri_to_relative_uri_string_v2(rss_serializer->world, base_uri, item_item->uri);
1360 attrs[0]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1361 rss_serializer->default_nspace,
1362 (const unsigned char*)"resource",
1363 ruri_string);
1364 raptor_free_memory(ruri_string);
1365 raptor_xml_element_set_attributes(rdf_li_element, attrs, 1);
1366
1367 raptor_xml_writer_empty_element(xml_writer, rdf_li_element);
1368
1369 raptor_xml_writer_newline(xml_writer);
1370
1371 raptor_free_xml_element(rdf_li_element);
1372 }
1373
1374 raptor_xml_writer_end_element(xml_writer, rdf_Seq_element);
1375
1376 raptor_free_xml_element(rdf_Seq_element);
1377
1378 raptor_xml_writer_end_element(xml_writer, rss_items_predicate);
1379
1380 raptor_free_xml_element(rss_items_predicate);
1381 }
1382
1383
1384 /* emit a block of RDF/XML depending on the rssTriples feature mode */
1385 static void
raptor_rss10_emit_rdfxml_item_triples(raptor_serializer * serializer,raptor_rss_item * item)1386 raptor_rss10_emit_rdfxml_item_triples(raptor_serializer *serializer,
1387 raptor_rss_item *item)
1388 {
1389 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
1390 raptor_xml_writer* xml_writer;
1391 raptor_qname* root_qname=NULL;
1392 raptor_xml_element* root_element=NULL;
1393 raptor_serializer* ser=NULL;
1394 raptor_uri* base_uri=NULL;
1395 int t_max_count=raptor_sequence_size(item->triples);
1396 int t_count;
1397 int t;
1398 int is_atom;
1399
1400 if(rss_serializer->rss_triples_mode == 0 || !item->triples)
1401 return;
1402
1403 xml_writer=rss_serializer->xml_writer;
1404 is_atom=rss_serializer->is_atom;
1405
1406 /* can only use atom-triples with atom serializer */
1407 if(rss_serializer->rss_triples_mode == 2 && !is_atom)
1408 return;
1409
1410 /* can only use rdf-xml with rss-1.0 serializer */
1411 if(rss_serializer->rss_triples_mode == 1 && is_atom)
1412 return;
1413
1414 t_count=0;
1415 for(t=0; t < t_max_count; t++) {
1416 if(raptor_sequence_get_at(item->triples, t))
1417 t_count++;
1418 }
1419 if(!t_count)
1420 return;
1421
1422 RAPTOR_DEBUG2("Serializing %d triples\n", t_count);
1423
1424 if(is_atom) {
1425 raptor_namespace* at_nspace=rss_serializer->nspaces[ATOMTRIPLES_NS];
1426
1427 /* atom:md with no attribute */
1428 root_qname=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1429 at_nspace,
1430 (const unsigned char*)"md",
1431 NULL);
1432 if(!root_qname)
1433 goto oom;
1434
1435 base_uri=serializer->base_uri;
1436 if(base_uri)
1437 base_uri=raptor_uri_copy_v2(rss_serializer->world, base_uri);
1438
1439 /* after this root_element owns root_qname and (this copy of) base_uri */
1440 root_element=raptor_new_xml_element(root_qname, NULL, base_uri);
1441 if(!root_element) {
1442 if(base_uri)
1443 raptor_free_uri_v2(rss_serializer->world, base_uri);
1444 raptor_free_qname(root_qname); root_qname=NULL;
1445 goto oom;
1446 }
1447 root_qname=NULL;
1448
1449 raptor_xml_writer_start_element(xml_writer, root_element);
1450 }
1451
1452 ser=raptor_new_serializer_v2(rss_serializer->world, "rdfxml-abbrev");
1453 if(!ser)
1454 goto oom;
1455
1456 raptor_rdfxmla_serialize_set_xml_writer(ser, xml_writer,
1457 rss_serializer->nstack);
1458 raptor_rdfxmla_serialize_set_write_rdf_RDF(ser, 0);
1459 raptor_rdfxmla_serialize_set_single_node(ser, item->uri);
1460 if(rss_serializer->rss_triples_mode == 2) {
1461 /* raptor_rdfxmla_serialize_set_write_typed_nodes(ser, 0); */
1462 }
1463
1464 if(base_uri)
1465 base_uri=raptor_uri_copy_v2(rss_serializer->world, base_uri);
1466
1467 /* after this call, ser owns (this copy of) base_uri and does
1468 * NOT own serializer->iostream and will not destroy it
1469 * when raptor_free_serializer(ser) is called.
1470 */
1471 raptor_serialize_start_to_iostream(ser, base_uri, serializer->iostream);
1472
1473 for(t=0; t < t_max_count; t++) {
1474 raptor_statement_v2* s;
1475 s=(raptor_statement_v2*)raptor_sequence_get_at(item->triples, t);
1476 if(s)
1477 raptor_serialize_statement(ser, s->s);
1478 }
1479
1480 raptor_serialize_end(ser);
1481
1482 raptor_free_serializer(ser); ser=NULL;
1483
1484 if(is_atom)
1485 raptor_xml_writer_end_element(xml_writer, root_element);
1486
1487 oom:
1488 if(ser)
1489 raptor_free_serializer(ser);
1490 if(root_qname)
1491 raptor_free_qname(root_qname);
1492 if(root_element)
1493 raptor_free_xml_element(root_element);
1494 }
1495
1496
1497 /**
1498 * raptor_rss10_ensure_atom_field_zero_one:
1499 * @item: RSS item object
1500 * @f: ATOM field type
1501 *
1502 * INTERNAL - Check that the given item @field appears 0 or 1 times
1503 */
1504 static void
raptor_rss10_ensure_atom_field_zero_one(raptor_rss_item * item,raptor_rss_fields_type f)1505 raptor_rss10_ensure_atom_field_zero_one(raptor_rss_item* item,
1506 raptor_rss_fields_type f)
1507 {
1508 raptor_rss_field* field=item->fields[f];
1509 if(!field)
1510 return;
1511
1512 if(field->next) {
1513 /* more than 1 value so delete rest of values */
1514 raptor_rss_field* next=field->next;
1515 field->next=NULL;
1516
1517 do {
1518 field=next;
1519
1520 next=field->next;
1521 field->next=NULL;
1522 raptor_rss_field_free(field);
1523 } while(next);
1524 }
1525
1526 }
1527
1528
1529 /**
1530 * raptor_rss10_ensure_atom_feed_valid:
1531 * @rss_serializer: serializer object
1532 *
1533 * INTERNAL - Ensure the atom items have all the fields they need:
1534 * <id> & <title> & <updated>
1535 * plus:
1536 * <link rel='alternate' ...> OR <content>..
1537 *
1538 */
1539 static int
raptor_rss10_ensure_atom_feed_valid(raptor_rss10_serializer_context * rss_serializer)1540 raptor_rss10_ensure_atom_feed_valid(raptor_rss10_serializer_context *rss_serializer)
1541 {
1542 int is_atom;
1543 int i;
1544 raptor_rss_item* item;
1545 raptor_rss_model* rss_model;
1546 struct timeval tv;
1547 time_t now = 0;
1548
1549 #ifdef HAVE_GETTIMEOFDAY
1550 if(!gettimeofday(&tv, NULL))
1551 now = tv.tv_sec;
1552 #endif
1553
1554 is_atom=rss_serializer->is_atom;
1555 rss_model=&rss_serializer->model;
1556
1557 if(!is_atom)
1558 return 0;
1559
1560 item=rss_model->common[RAPTOR_RSS_CHANNEL];
1561 if(item) {
1562 int f;
1563
1564 /* atom:id is required */
1565 f=RAPTOR_RSS_FIELD_ATOM_ID;
1566 if(!item->fields[f]) {
1567 raptor_rss_field* field=raptor_rss_new_field(rss_serializer->world);
1568 field->uri=raptor_uri_copy_v2(rss_serializer->world, item->uri);
1569 raptor_rss_item_add_field(item, f, field);
1570 }
1571
1572 /* atom:updated is required */
1573 f=RAPTOR_RSS_FIELD_ATOM_UPDATED;
1574 if(!item->fields[f]) {
1575 raptor_rss_field* field=raptor_rss_new_field(rss_serializer->world);
1576 raptor_rss_set_date_field(field, now);
1577 raptor_rss_item_add_field(item, f, field);
1578 }
1579
1580 /* atom:content is forbidden in feed */
1581 f=RAPTOR_RSS_FIELD_ATOM_CONTENT;
1582 if(item->fields[f]) {
1583 raptor_rss_field_free(item->fields[f]);
1584 item->fields[f]=NULL;
1585 }
1586
1587 /* atom:summary is forbidden in feed */
1588 f=RAPTOR_RSS_FIELD_ATOM_SUMMARY;
1589 if(item->fields[f]) {
1590 raptor_rss_field_free(item->fields[f]);
1591 item->fields[f]=NULL;
1592 }
1593
1594 /* These fields can appear 0 or 1 times on a feed */
1595 raptor_rss10_ensure_atom_field_zero_one(item,
1596 RAPTOR_RSS_FIELD_ATOM_ICON);
1597 raptor_rss10_ensure_atom_field_zero_one(item,
1598 RAPTOR_RSS_FIELD_ATOM_LOGO);
1599 raptor_rss10_ensure_atom_field_zero_one(item,
1600 RAPTOR_RSS_FIELD_ATOM_RIGHTS);
1601 raptor_rss10_ensure_atom_field_zero_one(item,
1602 RAPTOR_RSS_FIELD_ATOM_SUBTITLE);
1603 }
1604
1605
1606 for(i=0; i < raptor_sequence_size(rss_serializer->items); i++) {
1607 item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i);
1608
1609 /* atom:id - defaults to item URI */
1610 if(!item->fields[RAPTOR_RSS_FIELD_ATOM_ID]) {
1611 raptor_rss_field* field=raptor_rss_new_field(rss_serializer->world);
1612 field->uri=raptor_uri_copy_v2(rss_serializer->world, item->uri);
1613 raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_ATOM_ID, field);
1614 }
1615
1616 /* atom:title - defaults to "untitled" */
1617 if(!item->fields[RAPTOR_RSS_FIELD_ATOM_TITLE]) {
1618 raptor_rss_field* field=raptor_rss_new_field(rss_serializer->world);
1619 field->value=(unsigned char*)RAPTOR_MALLOC(cstring, 9);
1620 strncpy((char*)field->value, "untitled", 9);
1621 raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_ATOM_TITLE, field);
1622 }
1623
1624 /* atom:updated - defaults to now time */
1625 if(!item->fields[RAPTOR_RSS_FIELD_ATOM_UPDATED]) {
1626 raptor_rss_field* field=raptor_rss_new_field(rss_serializer->world);
1627 raptor_rss_set_date_field(field, now);
1628 raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_ATOM_UPDATED, field);
1629 }
1630
1631 /* enforce there is either an atom:content OR atom:link (rel=alternate)
1632 * by adding a link to {item URI} if missing
1633 */
1634 if(!item->fields[RAPTOR_RSS_FIELD_ATOM_CONTENT] &&
1635 !item->fields[RAPTOR_RSS_FIELD_ATOM_LINK]) {
1636 raptor_rss_field* field=raptor_rss_new_field(rss_serializer->world);
1637 field->uri=raptor_uri_copy_v2(rss_serializer->world, item->uri);
1638 raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_ATOM_LINK, field);
1639 }
1640
1641 /* These fields can appear 0 or 1 times on an entry */
1642 raptor_rss10_ensure_atom_field_zero_one(item,
1643 RAPTOR_RSS_FIELD_ATOM_PUBLISHED);
1644 raptor_rss10_ensure_atom_field_zero_one(item,
1645 RAPTOR_RSS_FIELD_ATOM_RIGHTS);
1646 raptor_rss10_ensure_atom_field_zero_one(item,
1647 RAPTOR_RSS_FIELD_ATOM_SOURCE);
1648 raptor_rss10_ensure_atom_field_zero_one(item,
1649 RAPTOR_RSS_FIELD_ATOM_SUMMARY);
1650 }
1651
1652 return 0;
1653 }
1654
1655
1656 static void
raptor_rss10_emit_item(raptor_serializer * serializer,raptor_rss_item * item,int item_type,int emit_container)1657 raptor_rss10_emit_item(raptor_serializer* serializer,
1658 raptor_rss_item *item, int item_type,
1659 int emit_container)
1660 {
1661 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
1662 raptor_xml_writer* xml_writer;
1663 raptor_rss_model* rss_model;
1664 raptor_uri *base_uri=serializer->base_uri;
1665 raptor_xml_element *element=NULL;
1666 raptor_qname **attrs=NULL;
1667 raptor_uri* base_uri_copy=NULL;
1668 int fi;
1669 int is_atom;
1670
1671 #ifdef RAPTOR_DEBUG
1672 if(!item) {
1673 RAPTOR_FATAL3("Tried to emit NULL item of type %d - %s\n", item_type,
1674 raptor_rss_items_info[item_type].name);
1675 }
1676 #endif
1677
1678 xml_writer=rss_serializer->xml_writer;
1679 is_atom=rss_serializer->is_atom;
1680 rss_model=&rss_serializer->model;
1681
1682 if(!item->fields_count) {
1683 int i;
1684 for(i=0; i < raptor_sequence_size(rss_serializer->enclosures); i++) {
1685 raptor_rss_item *enclosure_item;
1686 enclosure_item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->enclosures, i);
1687 /* If the item and enclosure item have the same URI, move the
1688 * enclosure fields to the item. Assumed that they got conflated
1689 * previously such as when the enclosure url = the guid
1690 */
1691 if(enclosure_item->uri &&
1692 raptor_uri_equals_v2(rss_serializer->world, item->uri, enclosure_item->uri)) {
1693 int j;
1694 for (j=0; j < RAPTOR_RSS_FIELDS_SIZE;j++) {
1695 if (j != RAPTOR_RSS_RDF_ENCLOSURE_TYPE &&
1696 j != RAPTOR_RSS_RDF_ENCLOSURE_LENGTH &&
1697 j != RAPTOR_RSS_RDF_ENCLOSURE_URL) {
1698 item->fields[j]=enclosure_item->fields[j];
1699 enclosure_item->fields[j]=NULL;
1700 item->fields_count++;
1701 enclosure_item->fields_count--;
1702 }
1703 }
1704 break;
1705 }
1706 }
1707 }
1708
1709 if(!item->fields_count)
1710 return;
1711
1712 if(emit_container) {
1713 base_uri_copy=base_uri ? raptor_uri_copy_v2(rss_serializer->world, base_uri) : NULL;
1714 element=raptor_new_xml_element(raptor_qname_copy(serializer->world->rss_types_info_qnames[item->node_typei]), NULL, base_uri_copy);
1715 if(!is_atom && item->uri) {
1716 unsigned char* ruri_string;
1717 attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 1, sizeof(raptor_qname*));
1718 ruri_string=raptor_uri_to_relative_uri_string_v2(rss_serializer->world, base_uri, item->uri);
1719 attrs[0]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1720 rss_serializer->default_nspace,
1721 (const unsigned char*)"about",
1722 ruri_string);
1723 raptor_free_memory(ruri_string);
1724 raptor_xml_element_set_attributes(element, attrs, 1);
1725 }
1726
1727 raptor_xml_writer_start_element(xml_writer, element);
1728 }
1729
1730
1731 for(fi = 0; fi < RAPTOR_RSS_FIELDS_SIZE; fi++) {
1732 raptor_rss_fields_type f = (raptor_rss_fields_type)fi;
1733 raptor_rss_field* field;
1734
1735 if(f == RAPTOR_RSS_FIELD_ITEMS)
1736 /* emitting the RSS items rdf:Seq block is done after this loop */
1737 continue;
1738
1739 if(!serializer->world->rss_fields_info_uris[f])
1740 continue;
1741
1742 if(f == RAPTOR_RSS_FIELD_ATOM_AUTHOR) {
1743 int typei;
1744
1745 if(!is_atom)
1746 continue;
1747
1748 if(item_type != RAPTOR_RSS_CHANNEL)
1749 continue;
1750
1751 typei=RAPTOR_ATOM_AUTHOR;
1752 if(!rss_model->common[typei]) {
1753 raptor_rss_item* author_item;
1754 raptor_identifier* identifier;
1755
1756 /* No atom author was present so make a new atom:author item
1757 * then either promote the string to an atom:name field OR
1758 * use "unknown"
1759 */
1760 author_item=raptor_rss_model_add_common(rss_model, (raptor_rss_type)typei);
1761 identifier=&(author_item->identifier);
1762
1763 author_item->node_type=&raptor_rss_items_info[typei];
1764 author_item->node_typei=typei;
1765 /* FIXME - uses _:author as bnode name - should make a new
1766 * genid for each author node. This is OK because there
1767 * is a check above that there is only 1 author per FEED.
1768 */
1769 identifier->id=(const unsigned char*)RAPTOR_MALLOC(cstring, 7);
1770 strncpy((char*)identifier->id, "author", 7);
1771
1772 identifier->type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
1773 identifier->uri_source=RAPTOR_URI_SOURCE_GENERATED;
1774
1775 /* Move atom:name author field, or create a dummy one */
1776 f=RAPTOR_RSS_FIELD_ATOM_NAME;
1777 if(item->fields[f]) {
1778 field=item->fields[f];
1779 item->fields[f]=NULL;
1780 } else {
1781 field=raptor_rss_new_field(serializer->world);
1782 field->value=(unsigned char*)RAPTOR_MALLOC(cstring, 8);
1783 strncpy((char*)field->value, "unknown", 8);
1784 }
1785 raptor_rss_item_add_field(author_item, RAPTOR_RSS_FIELD_ATOM_NAME, field);
1786
1787 /* Move atom author fields if found: atom:uri and atom:email
1788 * are only used inside Person constructs
1789 */
1790 f=RAPTOR_RSS_FIELD_ATOM_URI;
1791 if(item->fields[f]) {
1792 field=item->fields[f];
1793 raptor_rss_item_add_field(author_item, f, field);
1794 item->fields[f]=NULL;
1795 }
1796 f=RAPTOR_RSS_FIELD_ATOM_EMAIL;
1797 if(item->fields[f]) {
1798 field=item->fields[f];
1799 raptor_rss_item_add_field(author_item, f, field);
1800 item->fields[f]=NULL;
1801 }
1802 }
1803
1804 RAPTOR_DEBUG3("Emitting type %i - %s\n", typei,
1805 raptor_rss_items_info[typei].name);
1806 raptor_rss10_emit_item(serializer, rss_model->common[typei], typei,
1807 1);
1808 continue;
1809 }
1810
1811
1812 for (field=item->fields[f]; field; field=field->next) {
1813 raptor_xml_element* predicate;
1814
1815 base_uri_copy=base_uri ? raptor_uri_copy_v2(rss_serializer->world, base_uri) : NULL;
1816 predicate=raptor_new_xml_element(raptor_qname_copy(serializer->world->rss_fields_info_qnames[f]), NULL, base_uri_copy);
1817
1818 /* Use atom:summary in preference */
1819 if(is_atom && f == RAPTOR_RSS_FIELD_DESCRIPTION)
1820 continue;
1821
1822 if(is_atom && field->uri) {
1823 unsigned char* ruri_string;
1824 size_t len;
1825 raptor_uri* my_base_uri=base_uri;
1826
1827 if(f == RAPTOR_RSS_FIELD_ATOM_ID)
1828 my_base_uri=NULL;
1829
1830 ruri_string=raptor_uri_to_relative_counted_uri_string_v2(rss_serializer->world,
1831 my_base_uri,
1832 field->uri, &len);
1833
1834 if(f == RAPTOR_RSS_FIELD_ATOM_LINK &&
1835 !item->fields[RAPTOR_RSS_FIELD_ATOM_CONTENT]) {
1836 /* atom:link to URI and there is no atom:content */
1837 raptor_qname **predicate_attrs=NULL;
1838 predicate_attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 2,
1839 sizeof(raptor_qname*));
1840 predicate_attrs[0]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1841 NULL,
1842 (const unsigned char*)"href",
1843 ruri_string);
1844 predicate_attrs[1]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1845 NULL,
1846 (const unsigned char*)"rel",
1847 (const unsigned char*)"alternate");
1848 field->value=NULL;
1849 raptor_xml_element_set_attributes(predicate, predicate_attrs, 2);
1850 raptor_xml_writer_empty_element(xml_writer, predicate);
1851 } else if(f == RAPTOR_RSS_FIELD_ATOM_CONTENT) {
1852 /* <atom:content src="{uri value}" type="{type}" /> */
1853 raptor_qname **predicate_attrs=NULL;
1854 const unsigned char* content_type;
1855 raptor_rss_field* content_type_field;
1856
1857 /* get the type */
1858 content_type_field=item->fields[RAPTOR_RSS_FIELD_AT_CONTENT_TYPE];
1859 if(content_type_field && content_type_field->value)
1860 content_type=content_type_field->value;
1861 else
1862 /* FIXME - default content type */
1863 content_type=(const unsigned char*)"text/html";
1864
1865 predicate_attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 2,
1866 sizeof(raptor_qname*));
1867 predicate_attrs[0]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1868 NULL,
1869 (const unsigned char*)"src",
1870 ruri_string);
1871 predicate_attrs[1]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1872 NULL,
1873 (const unsigned char*)"type",
1874 (const unsigned char*)content_type);
1875 /* free at:contentType field - no need to emit it */
1876 if(content_type_field) {
1877 raptor_rss_field_free(content_type_field);
1878 item->fields[RAPTOR_RSS_FIELD_AT_CONTENT_TYPE]=NULL;
1879 }
1880
1881 field->value=NULL;
1882 raptor_xml_element_set_attributes(predicate, predicate_attrs, 2);
1883 raptor_xml_writer_empty_element(xml_writer, predicate);
1884 } else {
1885 raptor_xml_writer_start_element(xml_writer, predicate);
1886 raptor_xml_writer_cdata_counted(xml_writer, ruri_string, len);
1887 raptor_xml_writer_end_element(xml_writer, predicate);
1888 }
1889 raptor_free_memory(ruri_string);
1890
1891 } else if (field->uri) {
1892 raptor_uri* enclosure_uri=field->uri;
1893 raptor_rss_item *enclosure_item=NULL;
1894 int i;
1895 if (f == RAPTOR_RSS_FIELD_ENCLOSURE && item_type == RAPTOR_RSS_ITEM) {
1896 for(i=0; i < raptor_sequence_size(rss_serializer->enclosures); i++) {
1897 enclosure_item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->enclosures, i);
1898 if(enclosure_item->uri && raptor_uri_equals_v2(rss_serializer->world, enclosure_uri, enclosure_item->uri))
1899 break;
1900 }
1901 if (enclosure_item) {
1902 int attr_count=0;
1903 unsigned char* ruri_string;
1904
1905 attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 3, sizeof(raptor_qname*));
1906 ruri_string=raptor_uri_to_relative_uri_string_v2(rss_serializer->world, base_uri, field->uri);
1907 attrs[attr_count]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1908 rss_serializer->default_nspace,
1909 (const unsigned char*)"resource",
1910 ruri_string);
1911 raptor_free_memory(ruri_string);
1912 attr_count++;
1913 if (enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_TYPE] && enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_TYPE]->value) {
1914 attrs[attr_count]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1915 rss_serializer->nspaces[RSS2_0_ENC_NS],
1916 (const unsigned char*)raptor_rss_fields_info[RAPTOR_RSS_RDF_ENCLOSURE_TYPE].name,
1917 (const unsigned char*)enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_TYPE]->value);
1918 attr_count++;
1919 }
1920 if (enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_LENGTH] && enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_LENGTH]->value) {
1921 attrs[attr_count]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1922 rss_serializer->nspaces[RSS2_0_ENC_NS],
1923 (const unsigned char*)raptor_rss_fields_info[RAPTOR_RSS_RDF_ENCLOSURE_LENGTH].name,
1924 (const unsigned char*)enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_LENGTH]->value);
1925 attr_count++;
1926 }
1927 raptor_xml_element_set_attributes(predicate, attrs, attr_count);
1928 } else {
1929 RAPTOR_DEBUG2("Enclosure item with URI %s could not be found in list of enclosures\n", raptor_uri_as_string_v2(rss_serializer->world, enclosure_uri));
1930 }
1931 } else {
1932 unsigned char* ruri_string;
1933
1934 /* not an rss:item with an rss:enclosure field */
1935 attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 1, sizeof(raptor_qname*));
1936 ruri_string=raptor_uri_to_relative_uri_string_v2(rss_serializer->world, base_uri, field->uri);
1937 attrs[0]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1938 rss_serializer->default_nspace,
1939 (const unsigned char*)"resource",
1940 ruri_string);
1941 raptor_free_memory(ruri_string);
1942 raptor_xml_element_set_attributes(predicate, attrs, 1);
1943 }
1944 raptor_xml_writer_empty_element(xml_writer, predicate);
1945 } else if(field->value) {
1946 /* not a URI, must be a literal */
1947 int is_xhtml_content=field->is_xml;
1948 int prefer_cdata=(!is_atom && f == RAPTOR_RSS_FIELD_CONTENT_ENCODED);
1949
1950 if(is_xhtml_content && !prefer_cdata) {
1951 raptor_qname **predicate_attrs=NULL;
1952 predicate_attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 1, sizeof(raptor_qname*));
1953 if(is_atom)
1954 predicate_attrs[0]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1955 NULL,
1956 (const unsigned char*)"type",
1957 (const unsigned char*)"xhtml");
1958 else
1959 predicate_attrs[0]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
1960 rss_serializer->default_nspace,
1961 (const unsigned char*)"parseType",
1962 (const unsigned char*)"Literal");
1963 raptor_xml_element_set_attributes(predicate, predicate_attrs, 1);
1964 }
1965
1966 raptor_xml_writer_start_element(xml_writer, predicate);
1967 if(is_xhtml_content) {
1968 if(prefer_cdata)
1969 raptor_xml_writer_raw_counted(xml_writer,
1970 (const unsigned char*)"<![CDATA[", 9);
1971 raptor_xml_writer_raw(xml_writer, (const unsigned char*)field->value);
1972 if(prefer_cdata)
1973 raptor_xml_writer_raw_counted(xml_writer,
1974 (const unsigned char*)"]]>", 3);
1975 } else
1976 raptor_xml_writer_cdata(xml_writer, (const unsigned char*)field->value);
1977 raptor_xml_writer_end_element(xml_writer, predicate);
1978 } else {
1979 RAPTOR_DEBUG3("Field %d - %s had no URI or literal value\n",
1980 f, raptor_rss_fields_info[f].name);
1981 }
1982 raptor_free_xml_element(predicate);
1983 }
1984 }
1985
1986
1987 if(item_type == RAPTOR_RSS_CHANNEL) {
1988 if(is_atom)
1989 raptor_rss10_emit_atom_feed(serializer, item);
1990
1991 if(!is_atom)
1992 raptor_rss10_emit_rss_items(serializer);
1993 }
1994
1995 /* Add an RDF/XML block with remaining triples if Atom */
1996 if(item->triples && raptor_sequence_size(item->triples))
1997 raptor_rss10_emit_rdfxml_item_triples(serializer, item);
1998
1999 if(emit_container) {
2000 raptor_xml_writer_end_element(xml_writer, element);
2001 raptor_free_xml_element(element);
2002 }
2003
2004 }
2005
2006
2007 /**
2008 * raptor_rss10_serialize_end:
2009 * @serializer: serializer object
2010 *
2011 * INTERNAL (raptor_serializer_factory API) - End a serializing
2012 *
2013 * Return value: non-0 on failure
2014 */
2015 static int
raptor_rss10_serialize_end(raptor_serializer * serializer)2016 raptor_rss10_serialize_end(raptor_serializer* serializer) {
2017 raptor_rss10_serializer_context *rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
2018 raptor_rss_model* rss_model;
2019 int i;
2020 raptor_xml_writer* xml_writer;
2021 #ifdef RAPTOR_DEBUG
2022 int triple_count=0;
2023 #endif
2024 int is_atom;
2025 raptor_qname **attrs=NULL;
2026 int attrs_count=0;
2027 raptor_uri* entry_uri=NULL;
2028 raptor_rss_item* entry_item=NULL;
2029
2030 rss_model=&rss_serializer->model;
2031 is_atom=rss_serializer->is_atom;
2032
2033 raptor_rss10_build_items(rss_serializer);
2034
2035 raptor_rss10_move_leftover_statements(rss_serializer);
2036
2037 raptor_rss10_move_anonymous_statements(rss_serializer);
2038
2039 if(is_atom) {
2040 raptor_rss10_ensure_atom_feed_valid(rss_serializer);
2041
2042 raptor_rss10_remove_mapped_fields(rss_serializer);
2043
2044 if(serializer->feature_atom_entry_uri) {
2045 entry_uri=raptor_new_uri_v2(rss_serializer->world, serializer->feature_atom_entry_uri);
2046 for(i=0; i < raptor_sequence_size(rss_serializer->items); i++) {
2047 raptor_rss_item* item;
2048 item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i);
2049 if(raptor_uri_equals_v2(rss_serializer->world, item->uri, entry_uri)) {
2050 entry_item=item;
2051 break;
2052 }
2053 }
2054 if(!entry_item) {
2055 RAPTOR_DEBUG2("Entry URI %s was not found in list of items\n",
2056 raptor_uri_as_string_v2(rss_serializer->world, entry_uri));
2057 raptor_free_uri_v2(rss_serializer->world, entry_uri);
2058 entry_uri=NULL;
2059 }
2060 }
2061
2062 }
2063
2064 #ifdef RAPTOR_DEBUG
2065 for(i=0; i < raptor_sequence_size(rss_serializer->triples); i++) {
2066 raptor_statement_v2* t=(raptor_statement_v2*)raptor_sequence_get_at(rss_serializer->triples, i);
2067 if(t) {
2068 fprintf(stderr, " %d: ", i);
2069 raptor_print_statement_v2(t, stderr);
2070 fputc('\n', stderr);
2071 triple_count++;
2072 }
2073 }
2074 RAPTOR_DEBUG2("Starting with %d stored triples\n", triple_count);
2075 #endif
2076
2077 if(!rss_model->common[RAPTOR_RSS_CHANNEL]) {
2078 raptor_serializer_error(serializer, "No RSS channel found");
2079 return 1;
2080 }
2081
2082
2083 if(rss_serializer->xml_writer)
2084 raptor_free_xml_writer(rss_serializer->xml_writer);
2085
2086 xml_writer=raptor_new_xml_writer_v2(rss_serializer->world,
2087 rss_serializer->nstack,
2088 serializer->iostream,
2089 NULL, NULL, /* errors */
2090 1);
2091 rss_serializer->xml_writer=xml_writer;
2092 raptor_xml_writer_set_feature(xml_writer,
2093 RAPTOR_FEATURE_WRITER_AUTO_INDENT, 1);
2094 raptor_xml_writer_set_feature(xml_writer,
2095 RAPTOR_FEATURE_WRITER_AUTO_EMPTY, 1);
2096
2097 raptor_rss10_build_xml_names(serializer, (is_atom && entry_uri));
2098
2099 if(serializer->base_uri && serializer->feature_write_base_uri) {
2100 const unsigned char* base_uri_string;
2101
2102 attrs=(raptor_qname **)RAPTOR_CALLOC(qnamearray, 1, sizeof(raptor_qname*));
2103
2104 base_uri_string=raptor_uri_as_string_v2(rss_serializer->world, serializer->base_uri);
2105 attrs[attrs_count++]=raptor_new_qname_from_namespace_local_name_v2(rss_serializer->world,
2106 rss_serializer->xml_nspace,
2107 (const unsigned char*)"base",
2108 base_uri_string);
2109 }
2110
2111 if(attrs_count)
2112 raptor_xml_element_set_attributes(rss_serializer->root_element, attrs,
2113 attrs_count);
2114 else
2115 raptor_xml_element_set_attributes(rss_serializer->root_element, NULL, 0);
2116
2117 raptor_xml_writer_start_element(xml_writer, rss_serializer->root_element);
2118
2119
2120 if(entry_item) {
2121 RAPTOR_DEBUG1("Emitting entry\n");
2122 raptor_rss10_emit_item(serializer, entry_item, RAPTOR_RSS_ITEM, 0);
2123 raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1);
2124 } else {
2125 i=RAPTOR_RSS_CHANNEL;
2126 RAPTOR_DEBUG3("Emitting type %i - %s\n", i, raptor_rss_items_info[i].name);
2127 raptor_rss10_emit_item(serializer, rss_model->common[i], i, !is_atom);
2128 raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1);
2129
2130 if(rss_model->items_count) {
2131 for(i=0; i < raptor_sequence_size(rss_serializer->items); i++) {
2132 raptor_rss_item* item=(raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i);
2133 raptor_rss10_emit_item(serializer, item, RAPTOR_RSS_ITEM, 1);
2134 raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1);
2135 }
2136
2137 }
2138
2139 for(i=RAPTOR_RSS_CHANNEL+1; i< RAPTOR_RSS_COMMON_SIZE; i++) {
2140 raptor_rss_item* item;
2141
2142 if(is_atom) {
2143 /* atom 1.0 only serializes rss:item (channel is done above) */
2144 if(i != RAPTOR_RSS_ITEM)
2145 continue;
2146 } else {
2147 /* rss 1.0 ignores atom:author for now - FIXME */
2148 if(i == RAPTOR_ATOM_AUTHOR)
2149 continue;
2150 }
2151
2152 for (item=rss_model->common[i]; item; item=item->next) {
2153 RAPTOR_DEBUG3("Emitting type %i - %s\n", i, raptor_rss_items_info[i].name);
2154 raptor_rss10_emit_item(serializer, item, i, 1);
2155 }
2156 }
2157 }
2158
2159
2160 raptor_xml_writer_end_element(xml_writer, rss_serializer->root_element);
2161
2162 raptor_free_xml_element(rss_serializer->root_element);
2163
2164 raptor_xml_writer_newline(xml_writer);
2165
2166 raptor_xml_writer_flush(xml_writer);
2167
2168 return 0;
2169 }
2170
2171
2172 /* add a namespace */
2173 static int
raptor_rss10_serialize_declare_namespace_from_namespace(raptor_serializer * serializer,raptor_namespace * nspace)2174 raptor_rss10_serialize_declare_namespace_from_namespace(raptor_serializer* serializer,
2175 raptor_namespace *nspace)
2176 {
2177 raptor_rss10_serializer_context* rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
2178 int i;
2179
2180 for(i=0; i< raptor_sequence_size(rss_serializer->user_namespaces); i++) {
2181 raptor_namespace* ns;
2182 ns=(raptor_namespace*)raptor_sequence_get_at(rss_serializer->user_namespaces, i);
2183
2184 /* If prefix is already declared, ignore it */
2185 if(!ns->prefix && !nspace->prefix)
2186 return 1;
2187
2188 if(ns->prefix && nspace->prefix &&
2189 !strcmp((const char*)ns->prefix, (const char*)nspace->prefix))
2190 return 1;
2191
2192 if(ns->uri && nspace->uri &&
2193 raptor_uri_equals_v2(rss_serializer->world, ns->uri, nspace->uri))
2194 return 1;
2195 }
2196
2197 nspace=raptor_new_namespace_from_uri(rss_serializer->nstack,
2198 nspace->prefix, nspace->uri,
2199 0);
2200 if(!nspace)
2201 return 1;
2202
2203 raptor_sequence_push(rss_serializer->user_namespaces, nspace);
2204 return 0;
2205 }
2206
2207
2208 /* add a namespace */
2209 static int
raptor_rss10_serialize_declare_namespace(raptor_serializer * serializer,raptor_uri * uri,const unsigned char * prefix)2210 raptor_rss10_serialize_declare_namespace(raptor_serializer* serializer,
2211 raptor_uri *uri,
2212 const unsigned char *prefix)
2213 {
2214 raptor_rss10_serializer_context* rss_serializer=(raptor_rss10_serializer_context*)serializer->context;
2215 raptor_namespace *ns;
2216 int rc;
2217
2218 ns=raptor_new_namespace_from_uri(rss_serializer->nstack, prefix, uri, 0);
2219 rc=raptor_rss10_serialize_declare_namespace_from_namespace(serializer, ns);
2220 raptor_free_namespace(ns);
2221
2222 return rc;
2223 }
2224
2225
2226
2227 /**
2228 * raptor_rss10_serialize_finish_factory:
2229 * @factory: serializer factory
2230 *
2231 * INTERNAL (raptor_serializer_factory API) - finish the serializer factory
2232 */
2233 static void
raptor_rss10_serialize_finish_factory(raptor_serializer_factory * factory)2234 raptor_rss10_serialize_finish_factory(raptor_serializer_factory* factory)
2235 {
2236
2237 }
2238
2239
2240 static int
raptor_rss10_serializer_register_factory(raptor_serializer_factory * factory)2241 raptor_rss10_serializer_register_factory(raptor_serializer_factory *factory)
2242 {
2243 factory->context_length = sizeof(raptor_rss10_serializer_context);
2244
2245 factory->init = raptor_rss10_serialize_init;
2246 factory->terminate = raptor_rss10_serialize_terminate;
2247 factory->declare_namespace = raptor_rss10_serialize_declare_namespace;
2248 factory->declare_namespace_from_namespace = raptor_rss10_serialize_declare_namespace_from_namespace;
2249 factory->serialize_start = raptor_rss10_serialize_start;
2250 factory->serialize_statement = raptor_rss10_serialize_statement;
2251 factory->serialize_end = raptor_rss10_serialize_end;
2252 factory->finish_factory = raptor_rss10_serialize_finish_factory;
2253
2254 return 0;
2255 }
2256
2257
2258
2259 int
raptor_init_serializer_rss10(raptor_world * world)2260 raptor_init_serializer_rss10(raptor_world* world) {
2261 return raptor_serializer_register_factory(world,
2262 "rss-1.0", "RSS 1.0",
2263 NULL,
2264 NULL,
2265 (const unsigned char*)"http://purl.org/rss/1.0/spec",
2266 &raptor_rss10_serializer_register_factory);
2267 }
2268
2269 int
raptor_init_serializer_atom(raptor_world * world)2270 raptor_init_serializer_atom(raptor_world* world) {
2271 return raptor_serializer_register_factory(world,
2272 "atom", "Atom 1.0",
2273 "application/atom+xml",
2274 NULL,
2275 NULL,
2276 &raptor_rss10_serializer_register_factory);
2277 }
2278
2279