1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
4 */
5 /**
6 * \file record_conv.c
7 * \brief Record Conversions utility
8 */
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #include <string.h>
15 #include <yaz/log.h>
16 #include <yaz/yaz-iconv.h>
17 #include <yaz/marcdisp.h>
18 #include <yaz/record_conv.h>
19 #include <yaz/wrbuf.h>
20 #include <yaz/xmalloc.h>
21 #include <yaz/nmem.h>
22 #include <yaz/tpath.h>
23 #include <yaz/z-opac.h>
24 #include <yaz/xml_get.h>
25 #include <yaz/url.h>
26 #include <yaz/srw.h>
27 #include <yaz/timing.h>
28
29 #if YAZ_HAVE_XML2
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #include <libxml/xinclude.h>
33 #include <libxml/xpath.h>
34 #include <libxml/xpathInternals.h>
35 #if YAZ_HAVE_XSLT
36 #include <libxslt/xsltutils.h>
37 #include <libxslt/transform.h>
38 #endif
39 #if YAZ_HAVE_EXSLT
40 #include <libexslt/exslt.h>
41 #endif
42
43 /** \brief The internal structure for yaz_record_conv_t */
44 struct yaz_record_conv_struct {
45 /** \brief memory for configuration */
46 NMEM nmem;
47
48 /** \brief conversion rules (allocated using NMEM) */
49 struct yaz_record_conv_rule *rules;
50
51 /** \brief pointer to last conversion rule pointer in chain */
52 struct yaz_record_conv_rule **rules_p;
53
54 /** \brief string buffer for error messages */
55 WRBUF wr_error;
56
57 /** \brief path for opening files */
58 char *path;
59 };
60
61 struct marc_info {
62 NMEM nmem;
63 const char *input_charset;
64 const char *output_charset;
65 int input_format_mode;
66 int output_format_mode;
67 const char *leader_spec;
68 };
69
70 /** \brief transformation info (rule info) */
71 struct yaz_record_conv_rule {
72 struct yaz_record_conv_type *type;
73 void *info;
74 struct yaz_record_conv_rule *next;
75 };
76
77 /** \brief reset rules+configuration */
yaz_record_conv_reset(yaz_record_conv_t p)78 static void yaz_record_conv_reset(yaz_record_conv_t p)
79 {
80
81 struct yaz_record_conv_rule *r;
82 for (r = p->rules; r; r = r->next)
83 {
84 r->type->destroy(r->info);
85 }
86 wrbuf_rewind(p->wr_error);
87 nmem_reset(p->nmem);
88
89 p->rules = 0;
90
91 p->rules_p = &p->rules;
92 }
93
yaz_record_conv_destroy(yaz_record_conv_t p)94 void yaz_record_conv_destroy(yaz_record_conv_t p)
95 {
96 if (p)
97 {
98 yaz_record_conv_reset(p);
99 nmem_destroy(p->nmem);
100 wrbuf_destroy(p->wr_error);
101
102 xfree(p->path);
103 xfree(p);
104 }
105 }
106
107 #if YAZ_HAVE_XSLT
108 struct xslt_info {
109 NMEM nmem;
110 xmlDocPtr xsp_doc;
111 const char **xsl_parms;
112 };
113
construct_xslt(const xmlNode * ptr,const char * path,WRBUF wr_error)114 static void *construct_xslt(const xmlNode *ptr,
115 const char *path, WRBUF wr_error)
116 {
117 struct _xmlAttr *attr;
118 const char *stylesheet = 0;
119 struct xslt_info *info = 0;
120 NMEM nmem = 0;
121 int max_parms = 10;
122 int no_parms = 0;
123
124 if (strcmp((const char *) ptr->name, "xslt"))
125 return 0;
126
127 for (attr = ptr->properties; attr; attr = attr->next)
128 {
129 if (!xmlStrcmp(attr->name, BAD_CAST "stylesheet") &&
130 attr->children && attr->children->type == XML_TEXT_NODE)
131 stylesheet = (const char *) attr->children->content;
132 else
133 {
134 wrbuf_printf(wr_error, "Bad attribute '%s'"
135 "Expected stylesheet.", attr->name);
136 return 0;
137 }
138 }
139 nmem = nmem_create();
140 info = nmem_malloc(nmem, sizeof(*info));
141 info->nmem = nmem;
142 info->xsl_parms = nmem_malloc(
143 nmem, (2 * max_parms + 1) * sizeof(*info->xsl_parms));
144
145 for (ptr = ptr->children; ptr; ptr = ptr->next)
146 {
147 const char *name = 0;
148 const char *value = 0;
149 char *qvalue = 0;
150 if (ptr->type != XML_ELEMENT_NODE)
151 continue;
152 if (strcmp((const char *) ptr->name, "param"))
153 {
154 wrbuf_printf(wr_error, "Bad element '%s'"
155 "Expected param.", ptr->name);
156 nmem_destroy(nmem);
157 return 0;
158 }
159 for (attr = ptr->properties; attr; attr = attr->next)
160 {
161 if (!xmlStrcmp(attr->name, BAD_CAST "name") &&
162 attr->children && attr->children->type == XML_TEXT_NODE)
163 name = (const char *) attr->children->content;
164 else if (!xmlStrcmp(attr->name, BAD_CAST "value") &&
165 attr->children && attr->children->type == XML_TEXT_NODE)
166 value = (const char *) attr->children->content;
167 else
168 {
169 wrbuf_printf(wr_error, "Bad attribute '%s'"
170 "Expected name or value.", attr->name);
171 nmem_destroy(nmem);
172 return 0;
173 }
174 }
175 if (!name || !value)
176 {
177 wrbuf_printf(wr_error, "Missing attributes name or value");
178 nmem_destroy(nmem);
179 return 0;
180 }
181 if (no_parms >= max_parms)
182 {
183 wrbuf_printf(wr_error, "Too many parameters given");
184 nmem_destroy(nmem);
185 return 0;
186 }
187
188 qvalue = nmem_malloc(nmem, strlen(value) + 3);
189 strcpy(qvalue, "\'");
190 strcat(qvalue, value);
191 strcat(qvalue, "\'");
192
193 info->xsl_parms[2 * no_parms] = nmem_strdup(nmem, name);
194 info->xsl_parms[2 * no_parms + 1] = qvalue;
195 no_parms++;
196 }
197 info->xsl_parms[2 * no_parms] = 0;
198
199 if (!stylesheet)
200 {
201 wrbuf_printf(wr_error, "Element <xslt>: "
202 "attribute 'stylesheet' expected");
203 nmem_destroy(nmem);
204 }
205 else
206 {
207 char fullpath[1024];
208 xsltStylesheetPtr xsp;
209 if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath))
210 {
211 wrbuf_printf(wr_error, "Element <xslt stylesheet=\"%s\"/>:"
212 " could not locate stylesheet '%s'",
213 stylesheet, stylesheet);
214 if (path)
215 wrbuf_printf(wr_error, " with path '%s'", path);
216
217 nmem_destroy(nmem);
218 return 0;
219 }
220 info->xsp_doc = xmlParseFile(fullpath);
221 if (!info->xsp_doc)
222 {
223 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
224 " xml parse failed: %s", stylesheet, fullpath);
225 if (path)
226 wrbuf_printf(wr_error, " with path '%s'", path);
227 nmem_destroy(nmem);
228 return 0;
229 }
230 /* need to copy this before passing it to the processor. It will
231 be encapsulated in the xsp and destroyed by xsltFreeStylesheet */
232 xsp = xsltParseStylesheetDoc(xmlCopyDoc(info->xsp_doc, 1));
233 if (!xsp)
234 {
235 wrbuf_printf(wr_error, "Element: <xslt stylesheet=\"%s\"/>:"
236 " xslt parse failed: %s", stylesheet, fullpath);
237 if (path)
238 wrbuf_printf(wr_error, " with path '%s'", path);
239 wrbuf_printf(wr_error, " ("
240 #if YAZ_HAVE_EXSLT
241
242 "EXSLT enabled"
243 #else
244 "EXSLT not supported"
245 #endif
246 ")");
247 xmlFreeDoc(info->xsp_doc);
248 nmem_destroy(info->nmem);
249 }
250 else
251 {
252 xsltFreeStylesheet(xsp);
253 return info;
254 }
255 }
256 return 0;
257 }
258
convert_xslt(void * vinfo,WRBUF record,WRBUF wr_error)259 static int convert_xslt(void *vinfo, WRBUF record, WRBUF wr_error)
260 {
261 int ret = 0;
262 struct xslt_info *info = vinfo;
263
264 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
265 wrbuf_len(record));
266 if (!doc)
267 {
268 wrbuf_printf(wr_error, "xmlParseMemory failed");
269 ret = -1;
270 }
271 else
272 {
273 xmlDocPtr xsp_doc = xmlCopyDoc(info->xsp_doc, 1);
274 xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc);
275 xmlDocPtr res = xsltApplyStylesheet(xsp, doc, info->xsl_parms);
276 if (res)
277 {
278 xmlChar *out_buf = 0;
279 int out_len;
280
281 #if HAVE_XSLTSAVERESULTTOSTRING
282 xsltSaveResultToString(&out_buf, &out_len, res, xsp);
283 #else
284 xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1);
285 #endif
286 if (!out_buf)
287 {
288 wrbuf_printf(wr_error,
289 "xsltSaveResultToString failed");
290 ret = -1;
291 }
292 else
293 {
294 wrbuf_rewind(record);
295 wrbuf_write(record, (const char *) out_buf, out_len);
296
297 xmlFree(out_buf);
298 }
299 xmlFreeDoc(res);
300 }
301 else
302 {
303 wrbuf_printf(wr_error, "xsltApplyStylesheet failed");
304 ret = -1;
305 }
306 xmlFreeDoc(doc);
307 xsltFreeStylesheet(xsp); /* frees xsp_doc too */
308 }
309 return ret;
310 }
311
destroy_xslt(void * vinfo)312 static void destroy_xslt(void *vinfo)
313 {
314 struct xslt_info *info = vinfo;
315
316 if (info)
317 {
318 xmlFreeDoc(info->xsp_doc);
319 nmem_destroy(info->nmem);
320 }
321 }
322
323 /* YAZ_HAVE_XSLT */
324 #endif
325
326 struct select_info {
327 NMEM nmem;
328 char *xpath_expr;
329 };
330
construct_select(const xmlNode * ptr,const char * path,WRBUF wr_error)331 static void *construct_select(const xmlNode *ptr,
332 const char *path, WRBUF wr_error)
333 {
334 if (strcmp((const char *) ptr->name, "select"))
335 return 0;
336 else
337 {
338 NMEM nmem = nmem_create();
339 struct select_info *info = nmem_malloc(nmem, sizeof(*info));
340 const char *attr_str;
341 const char *xpath = 0;
342
343 info->nmem = nmem;
344 info->xpath_expr = 0;
345 attr_str = yaz_xml_get_prop(ptr, "path%s", &xpath);
346 if (attr_str)
347 {
348 wrbuf_printf(wr_error, "Bad attribute '%s'"
349 "Expected xpath.", attr_str);
350 nmem_destroy(nmem);
351 return 0;
352 }
353 if (xpath)
354 info->xpath_expr = nmem_strdup(nmem, xpath);
355 return info;
356 }
357 }
358
convert_select(void * vinfo,WRBUF record,WRBUF wr_error)359 static int convert_select(void *vinfo, WRBUF record, WRBUF wr_error)
360 {
361 int ret = 0;
362 struct select_info *info = vinfo;
363
364 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
365 wrbuf_len(record));
366 if (!doc)
367 {
368 wrbuf_printf(wr_error, "xmlParseMemory failed");
369 ret = -1;
370 }
371 else
372 {
373 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
374 if (xpathCtx && info->xpath_expr)
375 {
376 xmlXPathObjectPtr xpathObj =
377 xmlXPathEvalExpression((const xmlChar *) info->xpath_expr,
378 xpathCtx);
379 if (xpathObj)
380 {
381 xmlNodeSetPtr nodes = xpathObj->nodesetval;
382 if (nodes)
383 {
384 int i;
385 if (nodes->nodeNr > 0)
386 wrbuf_rewind(record);
387 for (i = 0; i < nodes->nodeNr; i++)
388 {
389 xmlNode *ptr = nodes->nodeTab[i];
390 if (ptr->type == XML_ELEMENT_NODE)
391 ptr = ptr->children;
392 for (; ptr; ptr = ptr->next)
393 if (ptr->type == XML_TEXT_NODE)
394 wrbuf_puts(record, (const char *) ptr->content);
395 }
396 }
397 xmlXPathFreeObject(xpathObj);
398 }
399 xmlXPathFreeContext(xpathCtx);
400 }
401 xmlFreeDoc(doc);
402 }
403 return ret;
404 }
405
destroy_select(void * vinfo)406 static void destroy_select(void *vinfo)
407 {
408 struct select_info *info = vinfo;
409
410 if (info)
411 nmem_destroy(info->nmem);
412 }
413
414
construct_solrmarc(const xmlNode * ptr,const char * path,WRBUF wr_error)415 static void *construct_solrmarc(const xmlNode *ptr,
416 const char *path, WRBUF wr_error)
417 {
418 if (strcmp((const char *) ptr->name, "solrmarc"))
419 return 0;
420 return wr_error; /* any non-null ptr will do; we don't use it later*/
421 }
422
convert_solrmarc(void * info,WRBUF record,WRBUF wr_error)423 static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error)
424 {
425 WRBUF w = wrbuf_alloc();
426 const char *buf = wrbuf_buf(record);
427 size_t i, sz = wrbuf_len(record);
428 for (i = 0; i < sz; i++)
429 {
430 int ch;
431 if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';'
432 && atoi_n_check(buf+i+1, 2, &ch))
433 i += 3;
434 else
435 ch = buf[i];
436 wrbuf_putc(w, ch);
437 }
438 wrbuf_rewind(record);
439 wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w));
440 wrbuf_destroy(w);
441 return 0;
442 }
443
destroy_solrmarc(void * info)444 static void destroy_solrmarc(void *info)
445 {
446 }
447
construct_marc(const xmlNode * ptr,const char * path,WRBUF wr_error)448 static void *construct_marc(const xmlNode *ptr,
449 const char *path, WRBUF wr_error)
450 {
451 NMEM nmem = nmem_create();
452 struct marc_info *info = nmem_malloc(nmem, sizeof(*info));
453 struct _xmlAttr *attr;
454 const char *input_format = 0;
455 const char *output_format = 0;
456
457 if (strcmp((const char *) ptr->name, "marc"))
458 {
459 nmem_destroy(nmem);
460 return 0;
461 }
462 info->nmem = nmem;
463 info->input_charset = 0;
464 info->output_charset = 0;
465 info->input_format_mode = 0;
466 info->output_format_mode = 0;
467 info->leader_spec = 0;
468
469 for (attr = ptr->properties; attr; attr = attr->next)
470 {
471 if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") &&
472 attr->children && attr->children->type == XML_TEXT_NODE)
473 info->input_charset = (const char *) attr->children->content;
474 else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") &&
475 attr->children && attr->children->type == XML_TEXT_NODE)
476 info->output_charset = (const char *) attr->children->content;
477 else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") &&
478 attr->children && attr->children->type == XML_TEXT_NODE)
479 input_format = (const char *) attr->children->content;
480 else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") &&
481 attr->children && attr->children->type == XML_TEXT_NODE)
482 output_format = (const char *) attr->children->content;
483 else if (!xmlStrcmp(attr->name, BAD_CAST "leaderspec") &&
484 attr->children && attr->children->type == XML_TEXT_NODE)
485 info->leader_spec =
486 nmem_strdup(info->nmem, (const char *) attr->children->content);
487 else
488 {
489 wrbuf_printf(wr_error, "Element <marc>: expected attributes"
490 "'inputformat', 'inputcharset', 'outputformat' or"
491 " 'outputcharset', got attribute '%s'",
492 attr->name);
493 nmem_destroy(info->nmem);
494 return 0;
495 }
496 }
497 if (!input_format)
498 {
499 wrbuf_printf(wr_error, "Element <marc>: "
500 "attribute 'inputformat' required");
501 nmem_destroy(info->nmem);
502 return 0;
503 }
504 else if (!strcmp(input_format, "marc"))
505 {
506 info->input_format_mode = YAZ_MARC_ISO2709;
507 }
508 else if (!strcmp(input_format, "xml"))
509 {
510 info->input_format_mode = YAZ_MARC_MARCXML;
511 /** Libxml2 generates UTF-8 encoding by default .
512 So we convert from UTF-8 to outputcharset (if defined)
513 */
514 if (!info->input_charset && info->output_charset)
515 info->input_charset = "utf-8";
516 }
517 else if (!strcmp(input_format, "json"))
518 {
519 info->input_format_mode = YAZ_MARC_JSON;
520 }
521 else
522 {
523 wrbuf_printf(wr_error, "Element <marc inputformat='%s'>: "
524 " Unsupported input format"
525 " defined by attribute value",
526 input_format);
527 nmem_destroy(info->nmem);
528 return 0;
529 }
530
531 if (!output_format)
532 {
533 wrbuf_printf(wr_error,
534 "Element <marc>: attribute 'outputformat' required");
535 nmem_destroy(info->nmem);
536 return 0;
537 }
538 else if (!strcmp(output_format, "line"))
539 {
540 info->output_format_mode = YAZ_MARC_LINE;
541 }
542 else if (!strcmp(output_format, "marcxml"))
543 {
544 info->output_format_mode = YAZ_MARC_MARCXML;
545 if (info->input_charset && !info->output_charset)
546 info->output_charset = "utf-8";
547 }
548 else if (!strcmp(output_format, "turbomarc"))
549 {
550 info->output_format_mode = YAZ_MARC_TURBOMARC;
551 if (info->input_charset && !info->output_charset)
552 info->output_charset = "utf-8";
553 }
554 else if (!strcmp(output_format, "marc"))
555 {
556 info->output_format_mode = YAZ_MARC_ISO2709;
557 }
558 else if (!strcmp(output_format, "marcxchange"))
559 {
560 info->output_format_mode = YAZ_MARC_XCHANGE;
561 if (info->input_charset && !info->output_charset)
562 info->output_charset = "utf-8";
563 }
564 else if (!strcmp(output_format, "json"))
565 {
566 info->output_format_mode = YAZ_MARC_JSON;
567 if (info->input_charset && !info->output_charset)
568 info->output_charset = "utf-8";
569 }
570 else
571 {
572 wrbuf_printf(wr_error, "Element <marc outputformat='%s'>: "
573 " Unsupported output format"
574 " defined by attribute value",
575 output_format);
576 nmem_destroy(info->nmem);
577 return 0;
578 }
579 if (info->input_charset && info->output_charset)
580 {
581 yaz_iconv_t cd = yaz_iconv_open(info->output_charset,
582 info->input_charset);
583 if (!cd)
584 {
585 wrbuf_printf(wr_error,
586 "Element <marc inputcharset='%s' outputcharset='%s'>:"
587 " Unsupported character set mapping"
588 " defined by attribute values",
589 info->input_charset, info->output_charset);
590 nmem_destroy(info->nmem);
591 return 0;
592 }
593 yaz_iconv_close(cd);
594 }
595 else if (!info->output_charset)
596 {
597 wrbuf_printf(wr_error, "Element <marc>: "
598 "attribute 'outputcharset' missing");
599 nmem_destroy(info->nmem);
600 return 0;
601 }
602 else if (!info->input_charset)
603 {
604 wrbuf_printf(wr_error, "Element <marc>: "
605 "attribute 'inputcharset' missing");
606 nmem_destroy(info->nmem);
607 return 0;
608 }
609 info->input_charset = nmem_strdup(info->nmem, info->input_charset);
610 info->output_charset = nmem_strdup(info->nmem, info->output_charset);
611 return info;
612 }
613
convert_marc(void * info,WRBUF record,WRBUF wr_error)614 static int convert_marc(void *info, WRBUF record, WRBUF wr_error)
615 {
616 struct marc_info *mi = info;
617 const char *input_charset = mi->input_charset;
618 int ret = 0;
619 yaz_marc_t mt = yaz_marc_create();
620
621 yaz_marc_xml(mt, mi->output_format_mode);
622 if (mi->leader_spec)
623 yaz_marc_leader_spec(mt, mi->leader_spec);
624
625 if (mi->input_format_mode == YAZ_MARC_ISO2709)
626 {
627 int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record),
628 wrbuf_len(record));
629 if (sz > 0)
630 {
631 if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record),
632 wrbuf_len(record)))
633 input_charset = "utf-8";
634 ret = 0;
635 }
636 else
637 ret = -1;
638 }
639 else if (mi->input_format_mode == YAZ_MARC_MARCXML ||
640 mi->input_format_mode == YAZ_MARC_TURBOMARC)
641 {
642 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
643 wrbuf_len(record));
644 if (!doc)
645 {
646 wrbuf_printf(wr_error, "xmlParseMemory failed");
647 ret = -1;
648 }
649 else
650 {
651 ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc));
652 if (ret)
653 wrbuf_printf(wr_error, "yaz_marc_read_xml failed");
654 }
655 xmlFreeDoc(doc);
656 }
657 else
658 {
659 wrbuf_printf(wr_error, "unsupported input format");
660 ret = -1;
661 }
662 if (ret == 0)
663 {
664 yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset);
665
666 if (cd)
667 yaz_marc_iconv(mt, cd);
668
669 wrbuf_rewind(record);
670 ret = yaz_marc_write_mode(mt, record);
671 if (ret)
672 wrbuf_printf(wr_error, "yaz_marc_write_mode failed");
673 if (cd)
674 yaz_iconv_close(cd);
675 }
676 yaz_marc_destroy(mt);
677 return ret;
678 }
679
destroy_marc(void * info)680 static void destroy_marc(void *info)
681 {
682 struct marc_info *mi = info;
683 nmem_destroy(mi->nmem);
684 }
685
686
687
688 /* each info covers one lookup xpath. They all share the nmem and namespaces*/
689 #define RDF_LOOKUP_MAX_KEYS 20
690 #define RDF_LOOKUP_MAX_NAMESPACES 20
691 struct rdf_lookup_info {
692 NMEM nmem;
693 struct rdf_lookup_info *next;
694 int debug;
695 int timeout;
696 char *xpath;
697 char *server;
698 char *method;
699 char *keys[RDF_LOOKUP_MAX_KEYS];
700 char **namespacelist;
701 };
702
construct_one_rdf_lookup(NMEM nmem,const xmlNode * ptr,WRBUF wr_error,int timeout)703 static struct rdf_lookup_info *construct_one_rdf_lookup(NMEM nmem,
704 const xmlNode *ptr,
705 WRBUF wr_error,
706 int timeout)
707 {
708 struct _xmlAttr *attr;
709 struct rdf_lookup_info *info = nmem_malloc(nmem, sizeof(*info));
710 int nkeys = 0;
711 info->nmem = nmem;
712 info->next = 0;
713 info->xpath = 0;
714 info->server = 0;
715 info->method = 0;
716 info->debug = 0;
717 info->timeout = timeout;
718 info->namespacelist = 0;
719 for (attr = ptr->properties; attr; attr = attr->next)
720 {
721 if (!xmlStrcmp(attr->name, BAD_CAST "xpath") &&
722 attr->children && attr->children->type == XML_TEXT_NODE)
723 info->xpath = nmem_strdup(nmem, (const char *) attr->children->content);
724 else
725 {
726 wrbuf_printf(wr_error, "Bad attribute '%s'"
727 "Expected xpath.", attr->name);
728 return 0;
729 }
730 }
731 ptr = ptr->children;
732 for ( ; ptr ; ptr = ptr->next)
733 {
734 if (ptr->type == XML_ELEMENT_NODE)
735 {
736 if (!xmlStrcmp(ptr->name, BAD_CAST "key"))
737 {
738 for (attr = ptr->properties; attr; attr = attr->next)
739 {
740 if (!xmlStrcmp(attr->name, BAD_CAST "field") &&
741 attr->children && attr->children->type == XML_TEXT_NODE)
742 {
743 info->keys[nkeys++] =
744 nmem_strdup(nmem, (const char *) attr->children->content);
745 if (nkeys >= RDF_LOOKUP_MAX_KEYS)
746 {
747 wrbuf_printf(wr_error, "Too many keys, max %d", RDF_LOOKUP_MAX_KEYS);
748 return 0;
749 }
750 info->keys[nkeys] = 0;
751 }
752 else
753 {
754 wrbuf_printf(wr_error, "Bad attribute '%s'. "
755 "Expected xpath.", attr->name);
756 return 0;
757 }
758 }
759 }
760 else if (!xmlStrcmp(ptr->name, BAD_CAST "server"))
761 {
762 for (attr = ptr->properties; attr; attr = attr->next)
763 {
764 if (!xmlStrcmp(attr->name, BAD_CAST "url") &&
765 attr->children && attr->children->type == XML_TEXT_NODE)
766 {
767 info->server = nmem_strdup(nmem, (const char *) attr->children->content);
768 }
769 else if (!xmlStrcmp(attr->name, BAD_CAST "method") &&
770 attr->children && attr->children->type == XML_TEXT_NODE)
771 {
772 info->method = nmem_strdup(nmem, (const char *) attr->children->content);
773 }
774 else
775 {
776 wrbuf_printf(wr_error, "Bad attribute '%s'. "
777 "Expected url or method.", attr->name);
778 return 0;
779 }
780 }
781 }
782 else
783 {
784 wrbuf_printf(wr_error, "Bad tag '%s'. "
785 "Expected 'key' or 'server'.", ptr->name);
786 return 0;
787 }
788 }
789 }
790 return info;
791 }
792
construct_rdf_lookup(const xmlNode * ptr,const char * path,WRBUF wr_error)793 static void *construct_rdf_lookup(const xmlNode *ptr,
794 const char *path, WRBUF wr_error)
795 {
796 NMEM nmem = 0;
797 struct rdf_lookup_info *info = 0;
798 struct rdf_lookup_info **next = &info;
799 const char *defserver = "http://id.loc.gov/authorities/names/label/%s";
800 char ** namespaces = 0;
801 int debug = 0;
802 int nns = 0;
803 int timeout = 0;
804 struct _xmlAttr *attr;
805 if (strcmp((const char *) ptr->name, "rdf-lookup"))
806 return 0;
807 yaz_log(YLOG_DEBUG, "Constructing rdf_lookup.");
808
809 for (attr = ptr->properties; attr; attr = attr->next)
810 {
811 if (!xmlStrcmp(attr->name, BAD_CAST "debug") &&
812 attr->children && attr->children->type == XML_TEXT_NODE)
813 {
814 debug = atoi((const char *) attr->children->content);
815 }
816 else if (!xmlStrcmp(attr->name, BAD_CAST "timeout") &&
817 attr->children && attr->children->type == XML_TEXT_NODE)
818 {
819 timeout = atoi((const char *) attr->children->content);
820 }
821 else
822 {
823 wrbuf_printf(wr_error, "Bad attribute '%s' for <rdf-lookup>. "
824 "Expected 'debug'", attr->name);
825 return 0;
826 }
827 }
828 nmem = nmem_create();
829 namespaces = nmem_malloc(nmem, RDF_LOOKUP_MAX_NAMESPACES *
830 2 * sizeof(char *));
831 namespaces[0] = 0;
832
833 ptr = ptr->children;
834 for ( ; ptr ; ptr = ptr->next) {
835 if (ptr->type == XML_ELEMENT_NODE)
836 {
837 if (!strcmp((const char *)ptr->name, "lookup"))
838 {
839 struct rdf_lookup_info *i = construct_one_rdf_lookup(nmem, ptr, wr_error, timeout);
840 if (!i)
841 {
842 nmem_destroy(nmem);
843 return 0; /* error already in wr_error */
844 }
845 else
846 {
847 i->namespacelist = namespaces;
848 i->debug = debug;
849 *next = i;
850 next = &((*next)->next);
851 if (! i->server)
852 i->server = nmem_strdup(nmem, defserver);
853 else
854 defserver = i->server;
855 if (! i->method)
856 i->method = nmem_strdup(nmem, "GET");
857 yaz_log(YLOG_DEBUG, "lookup: x=%s k[0]:%s, %s %s",
858 i->xpath, i->keys[0], i->method, i->server);
859 }
860 }
861 else if (!strcmp((const char *)ptr->name, "namespace"))
862 {
863 char * prefix = 0;
864 char * href = 0;
865 struct _xmlAttr *attr;
866 for (attr = ptr->properties; attr; attr = attr->next)
867 {
868 if (!xmlStrcmp(attr->name, BAD_CAST "prefix") &&
869 attr->children && attr->children->type == XML_TEXT_NODE)
870 {
871 prefix = nmem_strdup(nmem, (const char *) attr->children->content);
872 }
873 else if (!xmlStrcmp(attr->name, BAD_CAST "href") &&
874 attr->children && attr->children->type == XML_TEXT_NODE)
875 {
876 href = nmem_strdup(nmem, (const char *) attr->children->content);
877 }
878 else
879 {
880 wrbuf_printf(wr_error, "Bad attribute '%s'. "
881 "Expected 'prefix' or 'href'", attr->name);
882 nmem_destroy(nmem);
883 return 0;
884 }
885 }
886 if (prefix && href)
887 {
888 namespaces[nns++] = prefix;
889 namespaces[nns++] = href;
890 namespaces[nns] = 0 ; /* signal end */
891 }
892 else
893 {
894 wrbuf_printf(wr_error, "Bad namespace, need both 'prefix' and 'href'");
895 nmem_destroy(nmem);
896 return 0;
897 }
898 }
899 else
900 {
901 wrbuf_printf(wr_error, "Expected a <lookup> tag under rdf-lookup, not <%s>",
902 ptr->name);
903 nmem_destroy(nmem);
904 return 0;
905 }
906 }
907 }
908 return info;
909 }
910
destroy_rdf_lookup(void * info)911 static void destroy_rdf_lookup(void *info)
912 {
913 struct rdf_lookup_info *inf = info;
914 yaz_log(YLOG_DEBUG, "Destroying rdf_lookup");
915 nmem_destroy(inf->nmem);
916 }
917
918 /* Little helper to add a XML comment */
rdf_lookup_debug_comment(xmlNode * n,WRBUF uri,Z_HTTP_Response * resp,struct rdf_lookup_info * info,yaz_timing_t tim,const char * msg,int yloglevel)919 static void rdf_lookup_debug_comment(xmlNode *n,
920 WRBUF uri,
921 Z_HTTP_Response *resp,
922 struct rdf_lookup_info *info,
923 yaz_timing_t tim,
924 const char *msg,
925 int yloglevel)
926 {
927 WRBUF com = wrbuf_alloc();
928 wrbuf_printf(com, " rdf-lookup %s ", info->method);
929 wrbuf_puts_replace_str(com, wrbuf_cstr(uri), "--", "%2D%2D");
930 wrbuf_printf(com, " took %g sec", yaz_timing_get_real(tim));
931 if (resp)
932 wrbuf_printf(com, " and resulted in %d", resp->code);
933 if (msg)
934 {
935 wrbuf_puts(com, " ");
936 wrbuf_puts(com, msg);
937 }
938 yaz_log(yloglevel, "%s", wrbuf_cstr(com) + 1); /* no leading space here */
939 wrbuf_puts(com, " "); /* lead+suffix space in XML comment */
940 if (info->debug)
941 {
942 xmlNodePtr comnode = xmlNewComment((const xmlChar *)wrbuf_cstr(com));
943 xmlAddNextSibling(n, comnode);
944 }
945 wrbuf_destroy(com);
946 }
947
rdf_lookup_node(xmlNode * n,xmlXPathContextPtr xpathCtx,struct rdf_lookup_info * info)948 static void rdf_lookup_node(xmlNode *n, xmlXPathContextPtr xpathCtx,
949 struct rdf_lookup_info *info)
950 {
951 int i;
952 int nkey;
953 int done = 0;
954 WRBUF uri = wrbuf_alloc();
955 xpathCtx->node = n;
956 for (nkey = 0; !done && info->keys[nkey]; nkey++)
957 {
958 xmlXPathObjectPtr xpo =
959 xmlXPathEvalExpression((const xmlChar *)info->keys[nkey], xpathCtx);
960 xmlNodeSetPtr fldNodes = xpo->nodesetval;
961 yaz_log(YLOG_DEBUG, "lookup_node: %d: %s", nkey, info->keys[nkey]);
962 if (fldNodes)
963 {
964 for (i = 0; !done && i < fldNodes->nodeNr; i++)
965 {
966 xmlNode *f = fldNodes->nodeTab[i];
967 if (f->type == XML_ELEMENT_NODE)
968 f = f->children;
969 for (; f && !done; f = f->next)
970 if (f->type == XML_TEXT_NODE)
971 {
972 yaz_timing_t tim = yaz_timing_create();
973 Z_HTTP_Response *resp;
974 char *keybuf = xmalloc(3*strlen((const char*) f->content)+1);
975 yaz_url_t url = yaz_url_create();
976 yaz_url_set_max_redirects(url, 0); /* we just want the first redirect */
977 if (info->timeout)
978 yaz_url_set_timeout(url, info->timeout, 0);
979 yaz_log(YLOG_DEBUG, "Found key '%s'", (const char*) f->content);
980 yaz_encode_uri_component(keybuf, (const char*) f->content);
981 wrbuf_rewind(uri);
982 wrbuf_printf(uri, info->server, keybuf);
983 xfree(keybuf);
984 yaz_log(YLOG_DEBUG, "Fetching '%s'", wrbuf_cstr(uri));
985 yaz_timing_start(tim);
986 /* no hdrs, no body */
987 resp = yaz_url_exec(url, wrbuf_cstr(uri),
988 info->method, 0, 0, 0);
989 yaz_timing_stop(tim);
990 if (resp)
991 {
992 yaz_log(YLOG_DEBUG, "resp code %d, headers %p", resp->code, resp->headers);
993 if ((resp->code == 302 || resp->code == 200)
994 && resp->headers)
995 {
996 const char *newuri = z_HTTP_header_lookup(resp->headers, "X-Uri");
997 if (newuri && *newuri)
998 {
999 xmlSetProp(n, (const xmlChar *)"rdf:about",
1000 (const xmlChar *)newuri);
1001 done = 1;
1002 rdf_lookup_debug_comment(f->parent, uri,
1003 resp, info, tim, newuri, YLOG_DEBUG);
1004 }
1005 else
1006 {
1007 yaz_log(YLOG_LOG, "rdf-lookup: Got no X-Uri for %s",
1008 wrbuf_cstr(uri));
1009 rdf_lookup_debug_comment(f->parent, uri, resp, info, tim,
1010 "No X-URI Header in response!", YLOG_LOG);
1011 }
1012 }
1013 else
1014 {
1015 rdf_lookup_debug_comment(f->parent, uri, resp,
1016 info, tim, NULL, YLOG_LOG);
1017 }
1018 if (!done)
1019 { /* something went wrong, dump headers and message */
1020 const char *err = yaz_url_get_error(url);
1021 Z_HTTP_Header *r = resp->headers;
1022 for ( ; r; r = r->next)
1023 yaz_log(YLOG_DEBUG, " %s: %s", r->name, r->value);
1024 if (resp->content_len > 0)
1025 {
1026 int i = 0;
1027 for (i = 0; i < resp->content_len; i++)
1028 {
1029 if (strchr(" \r\n", resp->content_buf[i]))
1030 i++;
1031 }
1032 if (i < resp->content_len)
1033 yaz_log(YLOG_LOG, "Response: %*.s",
1034 resp->content_len - i,
1035 resp->content_buf + i);
1036 }
1037 if (err && *err)
1038 yaz_log(YLOG_LOG, "Error: %s", err);
1039 }
1040 }
1041 else
1042 {
1043 rdf_lookup_debug_comment(f->parent, uri, resp, info, tim,
1044 "NO RESPONSE", YLOG_LOG);
1045 }
1046 yaz_timing_destroy(&tim);
1047 yaz_url_destroy(url);
1048 }
1049 }
1050 }
1051 xmlXPathFreeObject(xpo);
1052 }
1053 wrbuf_destroy(uri);
1054 }
1055
convert_rdf_lookup(void * rinfo,WRBUF record,WRBUF wr_error)1056 static int convert_rdf_lookup(void *rinfo, WRBUF record, WRBUF wr_error)
1057 {
1058 int ret = 0;
1059 struct rdf_lookup_info *info = rinfo;
1060
1061 xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record),
1062 wrbuf_len(record));
1063 yaz_log(YLOG_DEBUG, "rdf_lookup convert starting");
1064 if (!doc)
1065 {
1066 wrbuf_printf(wr_error, "xmlParseMemory failed");
1067 ret = -1;
1068 }
1069 else
1070 {
1071 xmlChar *out_buf = 0;
1072 int out_len;
1073 xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
1074 if (xpathCtx)
1075 {
1076 char **ns = info->namespacelist;
1077 while (*ns)
1078 {
1079 xmlXPathRegisterNs(xpathCtx, (const xmlChar *)ns[0],
1080 (const xmlChar *)ns[1]);
1081 ns += 2;
1082 }
1083 while (info)
1084 {
1085 xmlXPathObjectPtr xpathObj =
1086 xmlXPathEvalExpression((xmlChar *)(info->xpath), xpathCtx);
1087 yaz_log(YLOG_DEBUG, "xpath: %p %s", xpathObj, info->xpath);
1088 if (xpathObj)
1089 {
1090 xmlNodeSetPtr nodes = xpathObj->nodesetval;
1091 yaz_log(YLOG_DEBUG, "nodeset: %p", nodes);
1092 if (nodes)
1093 {
1094 int i;
1095 for (i = 0; i < nodes->nodeNr; i++)
1096 {
1097 xmlNode *ptr = nodes->nodeTab[i];
1098 yaz_log(YLOG_DEBUG, " node %d: t=%d n='%s' c='%s'", i, ptr->type,
1099 (const char*) ptr->name, ptr->content);
1100 rdf_lookup_node(ptr, xpathCtx, info);
1101 }
1102 }
1103 xmlXPathFreeObject(xpathObj);
1104 }
1105 else
1106 {
1107 wrbuf_printf(wr_error,
1108 "Cannot compile X-Path expr: %s",
1109 info->xpath);
1110 ret = -1;
1111 }
1112 info = info->next;
1113 }
1114 xmlXPathFreeContext(xpathCtx);
1115 }
1116 xmlDocDumpFormatMemory (doc, &out_buf, &out_len, 1);
1117 if (!out_buf)
1118 {
1119 wrbuf_printf(wr_error,
1120 "xmlDocDumpFormatMemory failed");
1121 ret = -1;
1122 }
1123 else
1124 {
1125 wrbuf_rewind(record);
1126 wrbuf_write(record, (const char *) out_buf, out_len);
1127
1128 xmlFree(out_buf);
1129 }
1130 xmlFreeDoc(doc);
1131 }
1132 return ret;
1133 }
1134
yaz_record_conv_configure_t(yaz_record_conv_t p,const xmlNode * ptr,struct yaz_record_conv_type * types)1135 int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr,
1136 struct yaz_record_conv_type *types)
1137 {
1138 struct yaz_record_conv_type bt[5];
1139 size_t i = 0;
1140
1141 /* register marc */
1142 bt[i].construct = construct_marc;
1143 bt[i].convert = convert_marc;
1144 bt[i++].destroy = destroy_marc;
1145
1146 bt[i-1].next = &bt[i];
1147 bt[i].construct = construct_solrmarc;
1148 bt[i].convert = convert_solrmarc;
1149 bt[i++].destroy = destroy_solrmarc;
1150
1151 bt[i-1].next = &bt[i];
1152 bt[i].construct = construct_select;
1153 bt[i].convert = convert_select;
1154 bt[i++].destroy = destroy_select;
1155
1156 #if YAZ_HAVE_XSLT
1157 /* register xslt */
1158 bt[i-1].next = &bt[i];
1159 bt[i].construct = construct_xslt;
1160 bt[i].convert = convert_xslt;
1161 bt[i++].destroy = destroy_xslt;
1162
1163 /* register rdf_lookup */
1164 bt[i-1].next = &bt[i];
1165 bt[i].construct = construct_rdf_lookup;
1166 bt[i].convert = convert_rdf_lookup;
1167 bt[i++].destroy = destroy_rdf_lookup;
1168 #endif
1169
1170 bt[i-1].next = types;
1171 yaz_record_conv_reset(p);
1172
1173 /* parsing element children */
1174 for (ptr = ptr->children; ptr; ptr = ptr->next)
1175 {
1176 struct yaz_record_conv_type *t;
1177 struct yaz_record_conv_rule *r;
1178 void *info = 0;
1179 if (ptr->type != XML_ELEMENT_NODE)
1180 continue;
1181 for (t = &bt[0]; t; t = t->next)
1182 {
1183 wrbuf_rewind(p->wr_error);
1184 info = t->construct(ptr, p->path, p->wr_error);
1185
1186 if (info || wrbuf_len(p->wr_error))
1187 break;
1188 /* info== 0 and no error reported , ie not handled by it */
1189 }
1190 if (!info)
1191 {
1192 if (wrbuf_len(p->wr_error) == 0)
1193 wrbuf_printf(p->wr_error, "Element <backend>: "
1194 "unsupported element <%s>", ptr->name);
1195 return -1;
1196 }
1197 r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r));
1198 r->next = 0;
1199 r->info = info;
1200 r->type = nmem_malloc(p->nmem, sizeof(*t));
1201 memcpy(r->type, t, sizeof(*t));
1202 *p->rules_p = r;
1203 p->rules_p = &r->next;
1204 }
1205 return 0;
1206 }
1207
yaz_record_conv_configure(yaz_record_conv_t p,const xmlNode * ptr)1208 int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr)
1209 {
1210 return yaz_record_conv_configure_t(p, ptr, 0);
1211 }
1212
yaz_record_conv_record_rule(yaz_record_conv_t p,struct yaz_record_conv_rule * r,const char * input_record_buf,size_t input_record_len,WRBUF output_record)1213 static int yaz_record_conv_record_rule(yaz_record_conv_t p,
1214 struct yaz_record_conv_rule *r,
1215 const char *input_record_buf,
1216 size_t input_record_len,
1217 WRBUF output_record)
1218 {
1219 int ret = 0;
1220 WRBUF record = output_record; /* pointer transfer */
1221 wrbuf_rewind(p->wr_error);
1222
1223 wrbuf_write(record, input_record_buf, input_record_len);
1224 for (; ret == 0 && r; r = r->next)
1225 ret = r->type->convert(r->info, record, p->wr_error);
1226 return ret;
1227 }
1228
yaz_record_get_output_charset(yaz_record_conv_t p)1229 const char *yaz_record_get_output_charset(yaz_record_conv_t p)
1230 {
1231 struct yaz_record_conv_rule *r = p->rules;
1232 if (r && r->type->construct == construct_marc)
1233 {
1234 struct marc_info *mi = r->info;
1235 return mi->output_charset;
1236 }
1237 return 0;
1238 }
1239
yaz_record_conv_opac_record(yaz_record_conv_t p,Z_OPACRecord * input_record,WRBUF output_record)1240 int yaz_record_conv_opac_record(yaz_record_conv_t p,
1241 Z_OPACRecord *input_record,
1242 WRBUF output_record)
1243 {
1244 int ret = 0;
1245 struct yaz_record_conv_rule *r = p->rules;
1246 if (!r || r->type->construct != construct_marc)
1247 {
1248 wrbuf_puts(p->wr_error, "Expecting MARC rule as first rule for OPAC");
1249 ret = -1; /* no marc rule so we can't do OPAC */
1250 }
1251 else
1252 {
1253 struct marc_info *mi = r->info;
1254 const char *input_charset = mi->input_charset;
1255 yaz_iconv_t cd;
1256
1257 WRBUF res = wrbuf_alloc();
1258 yaz_marc_t mt = yaz_marc_create();
1259
1260 if (yaz_opac_check_marc21_coding(input_charset, input_record))
1261 input_charset = "utf-8";
1262 cd = yaz_iconv_open("utf-8", input_charset);
1263
1264 wrbuf_rewind(p->wr_error);
1265 yaz_marc_xml(mt, mi->output_format_mode);
1266
1267 if (mi->leader_spec)
1268 yaz_marc_leader_spec(mt, mi->leader_spec);
1269
1270 yaz_marc_iconv(mt, cd);
1271
1272 yaz_opac_decode_wrbuf(mt, input_record, res);
1273 if (ret != -1)
1274 {
1275 ret = yaz_record_conv_record_rule(p,
1276 r->next,
1277 wrbuf_buf(res), wrbuf_len(res),
1278 output_record);
1279 }
1280 yaz_marc_destroy(mt);
1281 if (cd)
1282 yaz_iconv_close(cd);
1283 wrbuf_destroy(res);
1284 }
1285 return ret;
1286 }
1287
yaz_record_conv_record(yaz_record_conv_t p,const char * input_record_buf,size_t input_record_len,WRBUF output_record)1288 int yaz_record_conv_record(yaz_record_conv_t p,
1289 const char *input_record_buf,
1290 size_t input_record_len,
1291 WRBUF output_record)
1292 {
1293 return yaz_record_conv_record_rule(p, p->rules,
1294 input_record_buf,
1295 input_record_len, output_record);
1296 }
1297
yaz_record_conv_get_error(yaz_record_conv_t p)1298 const char *yaz_record_conv_get_error(yaz_record_conv_t p)
1299 {
1300 return wrbuf_cstr(p->wr_error);
1301 }
1302
yaz_record_conv_set_path(yaz_record_conv_t p,const char * path)1303 void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path)
1304 {
1305 xfree(p->path);
1306 p->path = 0;
1307 if (path)
1308 p->path = xstrdup(path);
1309 }
1310
yaz_record_conv_create()1311 yaz_record_conv_t yaz_record_conv_create()
1312 {
1313 yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p));
1314 p->nmem = nmem_create();
1315 p->wr_error = wrbuf_alloc();
1316 p->rules = 0;
1317 p->path = 0;
1318 return p;
1319 }
1320
1321 /* YAZ_HAVE_XML2 */
1322 #endif
1323
1324 /*
1325 * Local variables:
1326 * c-basic-offset: 4
1327 * c-file-style: "Stroustrup"
1328 * indent-tabs-mode: nil
1329 * End:
1330 * vim: shiftwidth=4 tabstop=8 expandtab
1331 */
1332
1333