1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) Index Data
3  * See the file LICENSE for details.
4  */
5 
6 /**
7  * \file marcdisp.c
8  * \brief Implements MARC conversion utilities
9  */
10 
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14 
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18 
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
28 
29 #if YAZ_HAVE_XML2
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
32 #endif
33 
34 enum yaz_collection_state {
35     no_collection,
36     collection_first,
37     collection_second
38 };
39 
40 /** \brief node types for yaz_marc_node */
41 enum YAZ_MARC_NODE_TYPE
42 {
43     YAZ_MARC_DATAFIELD,
44     YAZ_MARC_CONTROLFIELD,
45     YAZ_MARC_COMMENT,
46     YAZ_MARC_LEADER
47 };
48 
49 /** \brief represents a data field */
50 struct yaz_marc_datafield {
51     char *tag;
52     char *indicator;
53     struct yaz_marc_subfield *subfields;
54 };
55 
56 /** \brief represents a control field */
57 struct yaz_marc_controlfield {
58     char *tag;
59     char *data;
60 };
61 
62 /** \brief a comment node */
63 struct yaz_marc_comment {
64     char *comment;
65 };
66 
67 /** \brief MARC node */
68 struct yaz_marc_node {
69     enum YAZ_MARC_NODE_TYPE which;
70     union {
71         struct yaz_marc_datafield datafield;
72         struct yaz_marc_controlfield controlfield;
73         char *comment;
74         char *leader;
75     } u;
76     struct yaz_marc_node *next;
77 };
78 
79 /** \brief represents a subfield */
80 struct yaz_marc_subfield {
81     char *code_data;
82     struct yaz_marc_subfield *next;
83 };
84 
85 /** \brief the internals of a yaz_marc_t handle */
86 struct yaz_marc_t_ {
87     WRBUF m_wr;
88     NMEM nmem;
89     int output_format;
90     int debug;
91     int write_using_libxml2;
92     enum yaz_collection_state enable_collection;
93     yaz_iconv_t iconv_cd;
94     char subfield_str[8];
95     char endline_str[8];
96     char *leader_spec;
97     struct yaz_marc_node *nodes;
98     struct yaz_marc_node **nodes_pp;
99     struct yaz_marc_subfield **subfield_pp;
100 };
101 
yaz_marc_create(void)102 yaz_marc_t yaz_marc_create(void)
103 {
104     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
105     mt->output_format = YAZ_MARC_LINE;
106     mt->debug = 0;
107     mt->write_using_libxml2 = 0;
108     mt->enable_collection = no_collection;
109     mt->m_wr = wrbuf_alloc();
110     mt->iconv_cd = 0;
111     mt->leader_spec = 0;
112     strcpy(mt->subfield_str, " $");
113     strcpy(mt->endline_str, "\n");
114 
115     mt->nmem = nmem_create();
116     yaz_marc_reset(mt);
117     return mt;
118 }
119 
yaz_marc_destroy(yaz_marc_t mt)120 void yaz_marc_destroy(yaz_marc_t mt)
121 {
122     if (!mt)
123         return ;
124     nmem_destroy(mt->nmem);
125     wrbuf_destroy(mt->m_wr);
126     xfree(mt->leader_spec);
127     xfree(mt);
128 }
129 
yaz_marc_get_nmem(yaz_marc_t mt)130 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
131 {
132     return mt->nmem;
133 }
134 
marc_iconv_reset(yaz_marc_t mt,WRBUF wr)135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
136 {
137     wrbuf_iconv_reset(wr, mt->iconv_cd);
138 }
139 
140 static int marc_exec_leader(const char *leader_spec, char *leader,
141                             size_t size);
142 #if YAZ_HAVE_XML2
143 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
144                                         const char *ns,
145                                         const char *format,
146                                         const char *type);
147 #endif
148 
yaz_marc_add_node(yaz_marc_t mt)149 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
150 {
151     struct yaz_marc_node *n = (struct yaz_marc_node *)
152         nmem_malloc(mt->nmem, sizeof(*n));
153     n->next = 0;
154     *mt->nodes_pp = n;
155     mt->nodes_pp = &n->next;
156     return n;
157 }
158 
159 #if YAZ_HAVE_XML2
yaz_marc_add_controlfield_xml(yaz_marc_t mt,const xmlNode * ptr_tag,const xmlNode * ptr_data)160 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
161                                    const xmlNode *ptr_data)
162 {
163     struct yaz_marc_node *n = yaz_marc_add_node(mt);
164     n->which = YAZ_MARC_CONTROLFIELD;
165     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
166     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
167 }
168 
yaz_marc_add_controlfield_xml2(yaz_marc_t mt,char * tag,const xmlNode * ptr_data)169 void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
170                                     const xmlNode *ptr_data)
171 {
172     struct yaz_marc_node *n = yaz_marc_add_node(mt);
173     n->which = YAZ_MARC_CONTROLFIELD;
174     n->u.controlfield.tag = tag;
175     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
176 }
177 
178 #endif
179 
180 
yaz_marc_add_comment(yaz_marc_t mt,char * comment)181 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
182 {
183     struct yaz_marc_node *n = yaz_marc_add_node(mt);
184     n->which = YAZ_MARC_COMMENT;
185     n->u.comment = nmem_strdup(mt->nmem, comment);
186 }
187 
yaz_marc_cprintf(yaz_marc_t mt,const char * fmt,...)188 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
189 {
190     va_list ap;
191     char buf[200];
192 
193     va_start(ap, fmt);
194     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
195     yaz_marc_add_comment(mt, buf);
196     va_end (ap);
197 }
198 
yaz_marc_get_debug(yaz_marc_t mt)199 int yaz_marc_get_debug(yaz_marc_t mt)
200 {
201     return mt->debug;
202 }
203 
yaz_marc_add_leader(yaz_marc_t mt,const char * leader,size_t leader_len)204 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
205 {
206     struct yaz_marc_node *n = yaz_marc_add_node(mt);
207     n->which = YAZ_MARC_LEADER;
208     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
209     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
210 }
211 
yaz_marc_add_controlfield(yaz_marc_t mt,const char * tag,const char * data,size_t data_len)212 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
213                                const char *data, size_t data_len)
214 {
215     struct yaz_marc_node *n = yaz_marc_add_node(mt);
216     n->which = YAZ_MARC_CONTROLFIELD;
217     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
218     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
219     if (mt->debug)
220     {
221         size_t i;
222         char msg[80];
223 
224         sprintf(msg, "controlfield:");
225         for (i = 0; i < 16 && i < data_len; i++)
226             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
227         if (i < data_len)
228             sprintf(msg + strlen(msg), " ..");
229         yaz_marc_add_comment(mt, msg);
230     }
231 }
232 
yaz_marc_add_datafield(yaz_marc_t mt,const char * tag,const char * indicator,size_t indicator_len)233 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
234                             const char *indicator, size_t indicator_len)
235 {
236     struct yaz_marc_node *n = yaz_marc_add_node(mt);
237     n->which = YAZ_MARC_DATAFIELD;
238     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
239     n->u.datafield.indicator =
240         nmem_strdupn(mt->nmem, indicator, indicator_len);
241     n->u.datafield.subfields = 0;
242 
243     /* make subfield_pp the current (last one) */
244     mt->subfield_pp = &n->u.datafield.subfields;
245 }
246 
247 /** \brief adds a attribute value to the element name if it is plain chars
248 
249     If not, and if the attribute name is not null, it will append a
250     attribute element with the value if attribute name is null it will
251     return a non-zero value meaning it couldnt handle the value.
252 */
element_name_append_attribute_value(yaz_marc_t mt,WRBUF buffer,const char * attribute_name,char * code_data,size_t code_len)253 static int element_name_append_attribute_value(
254     yaz_marc_t mt, WRBUF buffer,
255     const char *attribute_name, char *code_data, size_t code_len)
256 {
257     /* TODO Map special codes to something possible for XML ELEMENT names */
258 
259     int encode = 0;
260     size_t index = 0;
261     int success = 0;
262     for (index = 0; index < code_len; index++)
263     {
264         if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
265               (code_data[index] >= 'a' && code_data[index] <= 'z') ||
266               (code_data[index] >= 'A' && code_data[index] <= 'Z')))
267             encode = 1;
268     }
269     /* Add as attribute */
270     if (encode && attribute_name)
271         wrbuf_printf(buffer, " %s=\"", attribute_name);
272 
273     if (!encode || attribute_name)
274         wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
275     else
276         success = -1;
277 
278     if (encode && attribute_name)
279         wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
280     return success;
281 }
282 
283 #if YAZ_HAVE_XML2
yaz_marc_add_datafield_xml(yaz_marc_t mt,const xmlNode * ptr_tag,const char * indicator,size_t indicator_len)284 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
285                                 const char *indicator, size_t indicator_len)
286 {
287     struct yaz_marc_node *n = yaz_marc_add_node(mt);
288     n->which = YAZ_MARC_DATAFIELD;
289     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
290     n->u.datafield.indicator = nmem_strdup(mt->nmem, indicator);
291     n->u.datafield.subfields = 0;
292 
293     /* make subfield_pp the current (last one) */
294     mt->subfield_pp = &n->u.datafield.subfields;
295 }
296 
yaz_marc_add_datafield_xml2(yaz_marc_t mt,char * tag_value,char * indicators)297 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
298 {
299     struct yaz_marc_node *n = yaz_marc_add_node(mt);
300     n->which = YAZ_MARC_DATAFIELD;
301     n->u.datafield.tag = tag_value;
302     n->u.datafield.indicator = indicators;
303     n->u.datafield.subfields = 0;
304 
305     /* make subfield_pp the current (last one) */
306     mt->subfield_pp = &n->u.datafield.subfields;
307 }
308 
yaz_marc_datafield_set_indicators(struct yaz_marc_node * n,char * indicator)309 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
310 {
311     n->u.datafield.indicator = indicator;
312 }
313 
314 #endif
315 
yaz_marc_add_subfield(yaz_marc_t mt,const char * code_data,size_t code_data_len)316 void yaz_marc_add_subfield(yaz_marc_t mt,
317                            const char *code_data, size_t code_data_len)
318 {
319     if (mt->debug)
320     {
321         size_t i;
322         char msg[80];
323 
324         sprintf(msg, "subfield:");
325         for (i = 0; i < 16 && i < code_data_len; i++)
326             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
327         if (i < code_data_len)
328             sprintf(msg + strlen(msg), " ..");
329         yaz_marc_add_comment(mt, msg);
330     }
331 
332     if (mt->subfield_pp)
333     {
334         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
335             nmem_malloc(mt->nmem, sizeof(*n));
336         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
337         n->next = 0;
338         /* mark subfield_pp to point to this one, so we append here next */
339         *mt->subfield_pp = n;
340         mt->subfield_pp = &n->next;
341     }
342 }
343 
check_ascii(yaz_marc_t mt,char * leader,int offset,int ch_default)344 static void check_ascii(yaz_marc_t mt, char *leader, int offset,
345                         int ch_default)
346 {
347     if (leader[offset] < ' ' || leader[offset] > 127)
348     {
349         yaz_marc_cprintf(mt,
350                          "Leader character at offset %d is non-ASCII. "
351                          "Setting value to '%c'", offset, ch_default);
352         leader[offset] = ch_default;
353     }
354 }
355 
yaz_marc_set_leader(yaz_marc_t mt,const char * leader_c,int * indicator_length,int * identifier_length,int * base_address,int * length_data_entry,int * length_starting,int * length_implementation)356 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
357                          int *indicator_length,
358                          int *identifier_length,
359                          int *base_address,
360                          int *length_data_entry,
361                          int *length_starting,
362                          int *length_implementation)
363 {
364     char leader[24];
365 
366     memcpy(leader, leader_c, 24);
367 
368     check_ascii(mt, leader, 5, 'a');
369     check_ascii(mt, leader, 6, 'a');
370     check_ascii(mt, leader, 7, 'a');
371     check_ascii(mt, leader, 8, '#');
372     check_ascii(mt, leader, 9, '#');
373     if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0)
374     {
375         yaz_marc_cprintf(mt, "Indicator length at offset 10 should"
376                          " hold a number 1-9. Assuming 2");
377         leader[10] = '2';
378         *indicator_length = 2;
379     }
380     if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0)
381     {
382         yaz_marc_cprintf(mt, "Identifier length at offset 11 should "
383                          " hold a number 1-9. Assuming 2");
384         leader[11] = '2';
385         *identifier_length = 2;
386     }
387     if (!atoi_n_check(leader+12, 5, base_address))
388     {
389         yaz_marc_cprintf(mt, "Base address at offsets 12..16 should"
390                          " hold a number. Assuming 0");
391         *base_address = 0;
392     }
393     check_ascii(mt, leader, 17, '#');
394     check_ascii(mt, leader, 18, '#');
395     check_ascii(mt, leader, 19, '#');
396     if (!atoi_n_check(leader+20, 1, length_data_entry) ||
397         *length_data_entry < 3)
398     {
399         yaz_marc_cprintf(mt, "Length data entry at offset 20 should"
400                          " hold a number 3-9. Assuming 4");
401         *length_data_entry = 4;
402         leader[20] = '4';
403     }
404     if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4)
405     {
406         yaz_marc_cprintf(mt, "Length starting at offset 21 should"
407                          " hold a number 4-9. Assuming 5");
408         *length_starting = 5;
409         leader[21] = '5';
410     }
411     if (!atoi_n_check(leader+22, 1, length_implementation))
412     {
413         yaz_marc_cprintf(mt, "Length implementation at offset 22 should"
414                          " hold a number. Assuming 0");
415         *length_implementation = 0;
416         leader[22] = '0';
417     }
418     check_ascii(mt, leader, 23, '0');
419 
420     if (mt->debug)
421     {
422         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
423         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
424         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
425         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
426         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
427         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
428     }
429     yaz_marc_add_leader(mt, leader, 24);
430 }
431 
yaz_marc_subfield_str(yaz_marc_t mt,const char * s)432 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
433 {
434     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
435     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
436 }
437 
yaz_marc_endline_str(yaz_marc_t mt,const char * s)438 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
439 {
440     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
441     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
442 }
443 
444 /* try to guess how many bytes the identifier really is! */
cdata_one_character(yaz_marc_t mt,const char * buf)445 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
446 {
447     if (mt->iconv_cd)
448     {
449         size_t i;
450         for (i = 1; i<5; i++)
451         {
452             char outbuf[12];
453             size_t outbytesleft = sizeof(outbuf);
454             char *outp = outbuf;
455             const char *inp = buf;
456 
457             size_t inbytesleft = i;
458             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
459                                  &outp, &outbytesleft);
460             yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
461             if (r != (size_t) (-1))
462                 return i;  /* got a complete sequence */
463         }
464         return 1; /* giving up */
465     }
466     else
467     {
468         int error = 0;
469         size_t no_read = 0;
470         (void) yaz_read_UTF8_char((const unsigned char *) buf, 4,
471                                   &no_read, &error);
472         if (error == 0 && no_read > 0)
473             return no_read;
474     }
475     return 1; /* we don't know */
476 }
477 
yaz_marc_sizeof_char(yaz_marc_t mt,const char * buf)478 size_t yaz_marc_sizeof_char(yaz_marc_t mt, const char *buf)
479 {
480     return cdata_one_character(mt, buf);
481 }
482 
yaz_marc_reset(yaz_marc_t mt)483 void yaz_marc_reset(yaz_marc_t mt)
484 {
485     nmem_reset(mt->nmem);
486     mt->nodes = 0;
487     mt->nodes_pp = &mt->nodes;
488     mt->subfield_pp = 0;
489 }
490 
yaz_marc_write_check(yaz_marc_t mt,WRBUF wr)491 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
492 {
493     struct yaz_marc_node *n;
494     int identifier_length;
495     const char *leader = 0;
496 
497     for (n = mt->nodes; n; n = n->next)
498         if (n->which == YAZ_MARC_LEADER)
499         {
500             leader = n->u.leader;
501             break;
502         }
503 
504     if (!leader)
505         return -1;
506     if (!atoi_n_check(leader+11, 1, &identifier_length))
507         return -1;
508 
509     for (n = mt->nodes; n; n = n->next)
510     {
511         switch(n->which)
512         {
513         case YAZ_MARC_COMMENT:
514             wrbuf_iconv_write(wr, mt->iconv_cd,
515                               n->u.comment, strlen(n->u.comment));
516             wrbuf_puts(wr, "\n");
517             break;
518         default:
519             break;
520         }
521     }
522     return 0;
523 }
524 
get_subfield_len(yaz_marc_t mt,const char * data,int identifier_length)525 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
526                                int identifier_length)
527 {
528     /* if identifier length is 2 (most MARCs) or less (probably an error),
529        the code is a single character .. However we've
530        seen multibyte codes, so see how big it really is */
531     if (identifier_length > 2)
532         return identifier_length - 1;
533     else
534         return cdata_one_character(mt, data);
535 }
536 
yaz_marc_write_line(yaz_marc_t mt,WRBUF wr)537 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
538 {
539     struct yaz_marc_node *n;
540     int identifier_length;
541     const char *leader = 0;
542 
543     for (n = mt->nodes; n; n = n->next)
544         if (n->which == YAZ_MARC_LEADER)
545         {
546             leader = n->u.leader;
547             break;
548         }
549 
550     if (!leader)
551         return -1;
552     if (!atoi_n_check(leader+11, 1, &identifier_length))
553         return -1;
554 
555     for (n = mt->nodes; n; n = n->next)
556     {
557         struct yaz_marc_subfield *s;
558         switch(n->which)
559         {
560         case YAZ_MARC_DATAFIELD:
561             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
562                          n->u.datafield.indicator);
563             for (s = n->u.datafield.subfields; s; s = s->next)
564             {
565                 size_t using_code_len = get_subfield_len(mt, s->code_data,
566                                                          identifier_length);
567 
568                 wrbuf_puts (wr, mt->subfield_str);
569                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
570                                   using_code_len);
571                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
572                 wrbuf_iconv_puts(wr, mt->iconv_cd,
573                                  s->code_data + using_code_len);
574                 marc_iconv_reset(mt, wr);
575             }
576             wrbuf_puts (wr, mt->endline_str);
577             break;
578         case YAZ_MARC_CONTROLFIELD:
579             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
580             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
581             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
582             marc_iconv_reset(mt, wr);
583             wrbuf_puts (wr, mt->endline_str);
584             break;
585         case YAZ_MARC_COMMENT:
586             wrbuf_puts(wr, "(");
587             wrbuf_iconv_write(wr, mt->iconv_cd,
588                               n->u.comment, strlen(n->u.comment));
589             marc_iconv_reset(mt, wr);
590             wrbuf_puts(wr, ")\n");
591             break;
592         case YAZ_MARC_LEADER:
593             wrbuf_printf(wr, "%s\n", n->u.leader);
594         }
595     }
596     wrbuf_puts(wr, "\n");
597     return 0;
598 }
599 
yaz_marc_write_trailer(yaz_marc_t mt,WRBUF wr)600 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
601 {
602     if (mt->enable_collection == collection_second)
603     {
604         switch(mt->output_format)
605         {
606         case YAZ_MARC_MARCXML:
607         case YAZ_MARC_TURBOMARC:
608             wrbuf_printf(wr, "</collection>\n");
609             break;
610         case YAZ_MARC_XCHANGE:
611             wrbuf_printf(wr, "</collection>\n");
612             break;
613         }
614     }
615     return 0;
616 }
617 
yaz_marc_enable_collection(yaz_marc_t mt)618 void yaz_marc_enable_collection(yaz_marc_t mt)
619 {
620     mt->enable_collection = collection_first;
621 }
622 
yaz_marc_write_mode(yaz_marc_t mt,WRBUF wr)623 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
624 {
625     switch(mt->output_format)
626     {
627     case YAZ_MARC_LINE:
628         return yaz_marc_write_line(mt, wr);
629     case YAZ_MARC_MARCXML:
630         return yaz_marc_write_marcxml(mt, wr);
631     case YAZ_MARC_TURBOMARC:
632         return yaz_marc_write_turbomarc(mt, wr);
633     case YAZ_MARC_XCHANGE:
634         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
635     case YAZ_MARC_ISO2709:
636         return yaz_marc_write_iso2709(mt, wr);
637     case YAZ_MARC_CHECK:
638         return yaz_marc_write_check(mt, wr);
639     case YAZ_MARC_JSON:
640         return yaz_marc_write_json(mt, wr);
641     }
642     return -1;
643 }
644 
645 static const char *record_name[2]  	= { "record", "r"};
646 static const char *leader_name[2]  	= { "leader", "l"};
647 static const char *controlfield_name[2] = { "controlfield", "c"};
648 static const char *datafield_name[2]  	= { "datafield", "d"};
649 static const char *indicator_name[2]  	= { "ind", "i"};
650 static const char *subfield_name[2]  	= { "subfield", "s"};
651 
652 /** \brief common MARC XML/Xchange/turbomarc writer
653     \param mt handle
654     \param wr WRBUF output
655     \param ns XMLNS for the elements
656     \param format record format (e.g. "MARC21")
657     \param type record type (e.g. "Bibliographic")
658     \param turbo =1 for turbomarc
659     \retval 0 OK
660     \retval -1 failure
661 */
yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt,WRBUF wr,const char * ns,const char * format,const char * type,int turbo)662 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
663                                         const char *ns,
664                                         const char *format,
665                                         const char *type,
666                                         int turbo)
667 {
668     struct yaz_marc_node *n;
669     int identifier_length;
670     const char *leader = 0;
671 
672     for (n = mt->nodes; n; n = n->next)
673         if (n->which == YAZ_MARC_LEADER)
674         {
675             leader = n->u.leader;
676             break;
677         }
678 
679     if (!leader)
680         return -1;
681     if (!atoi_n_check(leader+11, 1, &identifier_length))
682         return -1;
683 
684     if (mt->enable_collection != no_collection)
685     {
686         if (mt->enable_collection == collection_first)
687         {
688             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
689             mt->enable_collection = collection_second;
690         }
691         wrbuf_printf(wr, "<%s", record_name[turbo]);
692     }
693     else
694     {
695         wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
696     }
697     if (format)
698         wrbuf_printf(wr, " format=\"%.80s\"", format);
699     if (type)
700         wrbuf_printf(wr, " type=\"%.80s\"", type);
701     wrbuf_printf(wr, ">\n");
702     for (n = mt->nodes; n; n = n->next)
703     {
704         struct yaz_marc_subfield *s;
705 
706         switch(n->which)
707         {
708         case YAZ_MARC_DATAFIELD:
709 
710             wrbuf_printf(wr, "  <%s", datafield_name[turbo]);
711             if (!turbo)
712             	wrbuf_printf(wr, " tag=\"");
713             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
714                                     strlen(n->u.datafield.tag));
715             if (!turbo)
716                 wrbuf_printf(wr, "\"");
717     	    if (n->u.datafield.indicator)
718     	    {
719     	    	int i;
720                 size_t off = 0;
721                 for (i = 0; n->u.datafield.indicator[off]; i++)
722     	    	{
723                     size_t ilen =
724                         cdata_one_character(mt, n->u.datafield.indicator + off);
725                     wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
726                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
727                                             n->u.datafield.indicator + off,
728                                             ilen);
729                     off += ilen;
730                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
731                 }
732             }
733             wrbuf_printf(wr, ">\n");
734             for (s = n->u.datafield.subfields; s; s = s->next)
735             {
736                 size_t using_code_len = get_subfield_len(mt, s->code_data,
737                                                          identifier_length);
738                 wrbuf_printf(wr, "    <%s", subfield_name[turbo]);
739                 if (!turbo)
740                 {
741                     wrbuf_printf(wr, " code=\"");
742                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
743                                             s->code_data, using_code_len);
744                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
745                 }
746                 else
747                 {
748                     element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
749                     wrbuf_puts(wr, ">");
750                 }
751                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
752                                         s->code_data + using_code_len,
753                                         strlen(s->code_data + using_code_len));
754                 marc_iconv_reset(mt, wr);
755                 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
756                 if (turbo)
757                     element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
758                 wrbuf_puts(wr, ">\n");
759             }
760             wrbuf_printf(wr, "  </%s", datafield_name[turbo]);
761             /* TODO Not CDATA */
762             if (turbo)
763             	wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
764                                         strlen(n->u.datafield.tag));
765             wrbuf_printf(wr, ">\n");
766             break;
767         case YAZ_MARC_CONTROLFIELD:
768             wrbuf_printf(wr, "  <%s", controlfield_name[turbo]);
769             if (!turbo)
770             {
771             	wrbuf_printf(wr, " tag=\"");
772                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
773         				strlen(n->u.controlfield.tag));
774                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
775             }
776             else
777             {
778                 /* TODO convert special */
779                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
780         				strlen(n->u.controlfield.tag));
781                 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
782             }
783             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
784                                     n->u.controlfield.data,
785                                     strlen(n->u.controlfield.data));
786             marc_iconv_reset(mt, wr);
787             wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
788             /* TODO convert special */
789             if (turbo)
790                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
791     					strlen(n->u.controlfield.tag));
792             wrbuf_puts(wr, ">\n");
793             break;
794         case YAZ_MARC_COMMENT:
795             wrbuf_printf(wr, "<!-- ");
796             wrbuf_puts(wr, n->u.comment);
797             wrbuf_printf(wr, " -->\n");
798             break;
799         case YAZ_MARC_LEADER:
800             wrbuf_printf(wr, "  <%s>", leader_name[turbo]);
801             wrbuf_iconv_write_cdata(wr,
802                                     0 , /* no charset conversion for leader */
803                                     n->u.leader, strlen(n->u.leader));
804             wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
805         }
806     }
807     wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
808     return 0;
809 }
810 
yaz_marc_write_marcxml_ns(yaz_marc_t mt,WRBUF wr,const char * ns,const char * format,const char * type,int turbo)811 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
812                                      const char *ns,
813                                      const char *format,
814                                      const char *type,
815                                      int turbo)
816 {
817     if (mt->write_using_libxml2)
818     {
819 #if YAZ_HAVE_XML2
820         int ret;
821         xmlNode *root_ptr;
822 
823         if (!turbo)
824             ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
825         else
826             ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
827         if (ret == 0)
828         {
829             xmlChar *buf_out;
830             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
831             int len_out;
832 
833             xmlDocSetRootElement(doc, root_ptr);
834             xmlDocDumpMemory(doc, &buf_out, &len_out);
835 
836             wrbuf_write(wr, (const char *) buf_out, len_out);
837             wrbuf_puts(wr, "");
838             xmlFree(buf_out);
839             xmlFreeDoc(doc);
840         }
841         return ret;
842 #else
843         return -1;
844 #endif
845     }
846     else
847         return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
848 }
849 
yaz_marc_write_marcxml(yaz_marc_t mt,WRBUF wr)850 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
851 {
852     /* set leader 09 to 'a' for UNICODE */
853     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
854     if (!mt->leader_spec)
855         yaz_marc_modify_leader(mt, 9, "a");
856     return yaz_marc_write_marcxml_ns(mt, wr,
857                                      "http://www.loc.gov/MARC21/slim",
858                                      0, 0, 0);
859 }
860 
yaz_marc_write_turbomarc(yaz_marc_t mt,WRBUF wr)861 int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
862 {
863     /* set leader 09 to 'a' for UNICODE */
864     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
865     if (!mt->leader_spec)
866         yaz_marc_modify_leader(mt, 9, "a");
867     return yaz_marc_write_marcxml_ns(mt, wr,
868                                      "http://www.indexdata.com/turbomarc", 0, 0, 1);
869 }
870 
yaz_marc_write_marcxchange(yaz_marc_t mt,WRBUF wr,const char * format,const char * type)871 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
872                                const char *format,
873                                const char *type)
874 {
875     return yaz_marc_write_marcxml_ns(mt, wr,
876                                      "info:lc/xmlns/marcxchange-v1",
877                                      0, 0, 0);
878 }
879 
880 #if YAZ_HAVE_XML2
write_xml_indicator(yaz_marc_t mt,struct yaz_marc_node * n,xmlNode * ptr,int turbo)881 static void write_xml_indicator(yaz_marc_t mt, struct yaz_marc_node *n,
882                                 xmlNode *ptr, int turbo)
883 {
884     if (n->u.datafield.indicator)
885     {
886         int i;
887         size_t off = 0;
888         for (i = 0; n->u.datafield.indicator[off]; i++)
889         {
890             size_t ilen =
891                 cdata_one_character(mt, n->u.datafield.indicator + off);
892             char ind_val[10];
893             if (ilen < sizeof(ind_val) - 1)
894             {
895                 char ind_str[12];
896                 sprintf(ind_str, "%s%d", indicator_name[turbo], i+1);
897                 memcpy(ind_val, n->u.datafield.indicator + off, ilen);
898                 ind_val[ilen] = '\0';
899                 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
900             }
901             off += ilen;
902         }
903     }
904 }
905 
add_marc_datafield_turbo_xml(yaz_marc_t mt,struct yaz_marc_node * n,xmlNode * record_ptr,xmlNsPtr ns_record,WRBUF wr_cdata,int identifier_length)906 static void add_marc_datafield_turbo_xml(yaz_marc_t mt,
907                                   struct yaz_marc_node *n,
908                                   xmlNode *record_ptr,
909                                   xmlNsPtr ns_record, WRBUF wr_cdata,
910                                   int identifier_length)
911 {
912     xmlNode *ptr;
913     struct yaz_marc_subfield *s;
914     WRBUF subfield_name = wrbuf_alloc();
915 
916     /* TODO consider if safe */
917     char field[10];
918     field[0] = 'd';
919     strncpy(field + 1, n->u.datafield.tag, 3);
920     field[4] = '\0';
921     ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
922 
923     write_xml_indicator(mt, n, ptr, 1);
924     for (s = n->u.datafield.subfields; s; s = s->next)
925     {
926         int not_written;
927         xmlNode *ptr_subfield;
928         size_t using_code_len = get_subfield_len(mt, s->code_data,
929                                                  identifier_length);
930         wrbuf_rewind(wr_cdata);
931         wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
932         marc_iconv_reset(mt, wr_cdata);
933 
934         wrbuf_rewind(subfield_name);
935         wrbuf_puts(subfield_name, "s");
936         not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
937         ptr_subfield = xmlNewTextChild(ptr, ns_record,
938                                        BAD_CAST wrbuf_cstr(subfield_name),
939                                        BAD_CAST wrbuf_cstr(wr_cdata));
940         if (not_written)
941         {
942             /* Generate code attribute value and add */
943             wrbuf_rewind(wr_cdata);
944             wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
945             xmlNewProp(ptr_subfield, BAD_CAST "code",  BAD_CAST wrbuf_cstr(wr_cdata));
946         }
947     }
948     wrbuf_destroy(subfield_name);
949 }
950 
yaz_marc_write_xml_turbo_xml(yaz_marc_t mt,xmlNode ** root_ptr,const char * ns,const char * format,const char * type)951 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
952                                         const char *ns,
953                                         const char *format,
954                                         const char *type)
955 {
956     struct yaz_marc_node *n;
957     int identifier_length;
958     const char *leader = 0;
959     xmlNode *record_ptr;
960     xmlNsPtr ns_record;
961     WRBUF wr_cdata = 0;
962 
963     for (n = mt->nodes; n; n = n->next)
964         if (n->which == YAZ_MARC_LEADER)
965         {
966             leader = n->u.leader;
967             break;
968         }
969 
970     if (!leader)
971         return -1;
972     if (!atoi_n_check(leader+11, 1, &identifier_length))
973         return -1;
974 
975     wr_cdata = wrbuf_alloc();
976 
977     record_ptr = xmlNewNode(0, BAD_CAST "r");
978     *root_ptr = record_ptr;
979 
980     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
981     xmlSetNs(record_ptr, ns_record);
982 
983     if (format)
984         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
985     if (type)
986         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
987     for (n = mt->nodes; n; n = n->next)
988     {
989         xmlNode *ptr;
990 
991         char field[10];
992         field[0] = 'c';
993         field[4] = '\0';
994 
995         switch(n->which)
996         {
997         case YAZ_MARC_DATAFIELD:
998             add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
999             break;
1000         case YAZ_MARC_CONTROLFIELD:
1001             wrbuf_rewind(wr_cdata);
1002             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1003             marc_iconv_reset(mt, wr_cdata);
1004 
1005             strncpy(field + 1, n->u.controlfield.tag, 3);
1006             ptr = xmlNewTextChild(record_ptr, ns_record,
1007                                   BAD_CAST field,
1008                                   BAD_CAST wrbuf_cstr(wr_cdata));
1009             break;
1010         case YAZ_MARC_COMMENT:
1011             ptr = xmlNewComment(BAD_CAST n->u.comment);
1012             xmlAddChild(record_ptr, ptr);
1013             break;
1014         case YAZ_MARC_LEADER:
1015             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
1016                             BAD_CAST n->u.leader);
1017             break;
1018         }
1019     }
1020     wrbuf_destroy(wr_cdata);
1021     return 0;
1022 }
1023 
1024 
yaz_marc_write_xml(yaz_marc_t mt,xmlNode ** root_ptr,const char * ns,const char * format,const char * type)1025 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1026                        const char *ns,
1027                        const char *format,
1028                        const char *type)
1029 {
1030     struct yaz_marc_node *n;
1031     int identifier_length;
1032     const char *leader = 0;
1033     xmlNode *record_ptr;
1034     xmlNsPtr ns_record;
1035     WRBUF wr_cdata = 0;
1036 
1037     for (n = mt->nodes; n; n = n->next)
1038         if (n->which == YAZ_MARC_LEADER)
1039         {
1040             leader = n->u.leader;
1041             break;
1042         }
1043 
1044     if (!leader)
1045         return -1;
1046     if (!atoi_n_check(leader+11, 1, &identifier_length))
1047         return -1;
1048 
1049     wr_cdata = wrbuf_alloc();
1050 
1051     record_ptr = xmlNewNode(0, BAD_CAST "record");
1052     *root_ptr = record_ptr;
1053 
1054     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1055     xmlSetNs(record_ptr, ns_record);
1056 
1057     if (format)
1058         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1059     if (type)
1060         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1061     for (n = mt->nodes; n; n = n->next)
1062     {
1063         struct yaz_marc_subfield *s;
1064         xmlNode *ptr;
1065 
1066         switch(n->which)
1067         {
1068         case YAZ_MARC_DATAFIELD:
1069             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1070             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1071             write_xml_indicator(mt, n, ptr, 0);
1072             for (s = n->u.datafield.subfields; s; s = s->next)
1073             {
1074                 xmlNode *ptr_subfield;
1075                 size_t using_code_len = get_subfield_len(mt, s->code_data,
1076                                                          identifier_length);
1077                 wrbuf_rewind(wr_cdata);
1078                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1079                                  s->code_data + using_code_len);
1080                 marc_iconv_reset(mt, wr_cdata);
1081                 ptr_subfield = xmlNewTextChild(
1082                     ptr, ns_record,
1083                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
1084 
1085                 wrbuf_rewind(wr_cdata);
1086                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1087                                   s->code_data, using_code_len);
1088                 xmlNewProp(ptr_subfield, BAD_CAST "code",
1089                            BAD_CAST wrbuf_cstr(wr_cdata));
1090             }
1091             break;
1092         case YAZ_MARC_CONTROLFIELD:
1093             wrbuf_rewind(wr_cdata);
1094             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1095             marc_iconv_reset(mt, wr_cdata);
1096 
1097             ptr = xmlNewTextChild(record_ptr, ns_record,
1098                                   BAD_CAST "controlfield",
1099                                   BAD_CAST wrbuf_cstr(wr_cdata));
1100 
1101             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1102             break;
1103         case YAZ_MARC_COMMENT:
1104             ptr = xmlNewComment(BAD_CAST n->u.comment);
1105             xmlAddChild(record_ptr, ptr);
1106             break;
1107         case YAZ_MARC_LEADER:
1108             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1109                             BAD_CAST n->u.leader);
1110             break;
1111         }
1112     }
1113     wrbuf_destroy(wr_cdata);
1114     return 0;
1115 }
1116 
1117 #endif
1118 
yaz_marc_write_iso2709(yaz_marc_t mt,WRBUF wr)1119 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1120 {
1121     struct yaz_marc_node *n, *cap_node = 0;
1122     int indicator_length;
1123     int identifier_length;
1124     int length_data_entry;
1125     int length_starting;
1126     int length_implementation;
1127     int data_offset = 0;
1128     const char *leader = 0;
1129     WRBUF wr_dir, wr_head, wr_data_tmp;
1130     int base_address;
1131 
1132     for (n = mt->nodes; n; n = n->next)
1133         if (n->which == YAZ_MARC_LEADER)
1134             leader = n->u.leader;
1135 
1136     if (!leader)
1137         return -1;
1138     if (!atoi_n_check(leader+10, 1, &indicator_length))
1139         return -1;
1140     if (!atoi_n_check(leader+11, 1, &identifier_length))
1141         return -1;
1142     if (!atoi_n_check(leader+20, 1, &length_data_entry))
1143         return -1;
1144     if (!atoi_n_check(leader+21, 1, &length_starting))
1145         return -1;
1146     if (!atoi_n_check(leader+22, 1, &length_implementation))
1147         return -1;
1148 
1149     wr_data_tmp = wrbuf_alloc();
1150     wr_dir = wrbuf_alloc();
1151     for (n = mt->nodes; n; n = n->next)
1152     {
1153         int data_length = 0;
1154         const char *tag = 0;
1155         struct yaz_marc_subfield *s;
1156 
1157         switch(n->which)
1158         {
1159         case YAZ_MARC_DATAFIELD:
1160             tag = n->u.datafield.tag;
1161             data_length += strlen(n->u.datafield.indicator);
1162             wrbuf_rewind(wr_data_tmp);
1163             for (s = n->u.datafield.subfields; s; s = s->next)
1164             {
1165                 /* write dummy IDFS + content */
1166                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1167                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1168                 marc_iconv_reset(mt, wr_data_tmp);
1169             }
1170             /* write dummy FS (makes MARC-8 to become ASCII) */
1171             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1172             marc_iconv_reset(mt, wr_data_tmp);
1173             data_length += wrbuf_len(wr_data_tmp);
1174             break;
1175         case YAZ_MARC_CONTROLFIELD:
1176             tag = n->u.controlfield.tag;
1177             wrbuf_rewind(wr_data_tmp);
1178             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1179                              n->u.controlfield.data);
1180             marc_iconv_reset(mt, wr_data_tmp);
1181             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1182             marc_iconv_reset(mt, wr_data_tmp);
1183             data_length += wrbuf_len(wr_data_tmp);
1184             break;
1185         case YAZ_MARC_COMMENT:
1186             break;
1187         case YAZ_MARC_LEADER:
1188             break;
1189         }
1190         if (data_length && tag)
1191         {
1192             if (wrbuf_len(wr_dir) + 40 + data_offset + data_length > 99999)
1193             {
1194                 cap_node = n;
1195                 break;
1196             }
1197             wrbuf_printf(wr_dir, "%3.3s", tag);
1198             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1199             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1200             data_offset += data_length;
1201         }
1202     }
1203     /* mark end of directory */
1204     wrbuf_putc(wr_dir, ISO2709_FS);
1205 
1206     /* base address of data (comes after leader+directory) */
1207     base_address = 24 + wrbuf_len(wr_dir);
1208 
1209     wr_head = wrbuf_alloc();
1210 
1211     /* write record length */
1212     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1213     /* from "original" leader */
1214     wrbuf_write(wr_head, leader+5, 7);
1215     /* base address of data */
1216     wrbuf_printf(wr_head, "%05d", base_address);
1217     /* from "original" leader */
1218     wrbuf_write(wr_head, leader+17, 7);
1219 
1220     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1221     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1222     wrbuf_destroy(wr_head);
1223     wrbuf_destroy(wr_dir);
1224     wrbuf_destroy(wr_data_tmp);
1225 
1226     for (n = mt->nodes; n != cap_node; n = n->next)
1227     {
1228         struct yaz_marc_subfield *s;
1229 
1230         switch(n->which)
1231         {
1232         case YAZ_MARC_DATAFIELD:
1233             wrbuf_puts(wr, n->u.datafield.indicator);
1234             for (s = n->u.datafield.subfields; s; s = s->next)
1235             {
1236                 wrbuf_putc(wr, ISO2709_IDFS);
1237                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1238                 marc_iconv_reset(mt, wr);
1239             }
1240             wrbuf_putc(wr, ISO2709_FS);
1241             break;
1242         case YAZ_MARC_CONTROLFIELD:
1243             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1244             marc_iconv_reset(mt, wr);
1245             wrbuf_putc(wr, ISO2709_FS);
1246             break;
1247         case YAZ_MARC_COMMENT:
1248             break;
1249         case YAZ_MARC_LEADER:
1250             break;
1251         }
1252     }
1253     wrbuf_printf(wr, "%c", ISO2709_RS);
1254     return 0;
1255 }
1256 
yaz_marc_write_json(yaz_marc_t mt,WRBUF w)1257 int yaz_marc_write_json(yaz_marc_t mt, WRBUF w)
1258 {
1259     int identifier_length;
1260     struct yaz_marc_node *n;
1261     const char *leader = 0;
1262     int first = 1;
1263 
1264     wrbuf_puts(w, "{\n");
1265     for (n = mt->nodes; n; n = n->next)
1266         if (n->which == YAZ_MARC_LEADER)
1267             leader = n->u.leader;
1268 
1269     if (!leader)
1270         return -1;
1271 
1272     if (!atoi_n_check(leader+11, 1, &identifier_length))
1273         return -1;
1274 
1275     wrbuf_puts(w, "\t\"leader\":\"");
1276     wrbuf_json_puts(w, leader);
1277     wrbuf_puts(w, "\",\n");
1278     wrbuf_puts(w, "\t\"fields\":\n\t[\n");
1279 
1280     for (n = mt->nodes; n; n = n->next)
1281     {
1282         struct yaz_marc_subfield *s;
1283         const char *sep = "";
1284         switch (n->which)
1285         {
1286         case YAZ_MARC_LEADER:
1287         case YAZ_MARC_COMMENT:
1288             break;
1289         case YAZ_MARC_CONTROLFIELD:
1290             if (first)
1291                 first = 0;
1292             else
1293                 wrbuf_puts(w, ",\n");
1294             wrbuf_puts(w, "\t\t{\n\t\t\t\"");
1295             wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.tag);
1296             wrbuf_puts(w, "\":\"");
1297             wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.data);
1298             wrbuf_puts(w, "\"\n\t\t}");
1299             break;
1300         case YAZ_MARC_DATAFIELD:
1301             if (first)
1302                 first = 0;
1303             else
1304                 wrbuf_puts(w, ",\n");
1305 
1306             wrbuf_puts(w, "\t\t{\n\t\t\t\"");
1307             wrbuf_json_puts(w, n->u.datafield.tag);
1308             wrbuf_puts(w, "\":\n\t\t\t{\n\t\t\t\t\"subfields\":\n\t\t\t\t[\n");
1309             for (s = n->u.datafield.subfields; s; s = s->next)
1310             {
1311                 size_t using_code_len = get_subfield_len(mt, s->code_data,
1312                                                          identifier_length);
1313                 wrbuf_puts(w, sep);
1314                 sep = ",\n";
1315                 wrbuf_puts(w, "\t\t\t\t\t{\n\t\t\t\t\t\t\"");
1316                 wrbuf_iconv_json_write(w, mt->iconv_cd,
1317                                        s->code_data, using_code_len);
1318                 wrbuf_puts(w, "\":\"");
1319                 wrbuf_iconv_json_puts(w, mt->iconv_cd,
1320                                       s->code_data + using_code_len);
1321                 wrbuf_puts(w, "\"\n\t\t\t\t\t}");
1322             }
1323             wrbuf_puts(w, "\n\t\t\t\t]");
1324             if (n->u.datafield.indicator)
1325             {
1326                 int i;
1327                 size_t off = 0;
1328                 for (i = 0; n->u.datafield.indicator[off]; i++)
1329                 {
1330                     size_t ilen =
1331                         cdata_one_character(mt, n->u.datafield.indicator + off);
1332                     wrbuf_printf(w, ",\n\t\t\t\t\"ind%d\":\"", i + 1);
1333                     wrbuf_json_write(w, &n->u.datafield.indicator[off], ilen);
1334                     wrbuf_printf(w, "\"");
1335                     off += ilen;
1336                 }
1337             }
1338             wrbuf_puts(w, "\n\t\t\t}\n");
1339             wrbuf_puts(w, "\n\t\t}");
1340             break;
1341         }
1342     }
1343     wrbuf_puts(w, "\n\t]\n");
1344     wrbuf_puts(w, "}\n");
1345     return 0;
1346 }
1347 
yaz_marc_decode_wrbuf(yaz_marc_t mt,const char * buf,int bsize,WRBUF wr)1348 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1349 {
1350     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1351     if (r <= 0)
1352         return r;
1353     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1354     if (s != 0)
1355         return -1; /* error */
1356     return r; /* OK, return length > 0 */
1357 }
1358 
yaz_marc_decode_buf(yaz_marc_t mt,const char * buf,int bsize,const char ** result,size_t * rsize)1359 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1360                          const char **result, size_t *rsize)
1361 {
1362     int r;
1363 
1364     wrbuf_rewind(mt->m_wr);
1365     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1366     if (result)
1367         *result = wrbuf_cstr(mt->m_wr);
1368     if (rsize)
1369         *rsize = wrbuf_len(mt->m_wr);
1370     return r;
1371 }
1372 
yaz_marc_xml(yaz_marc_t mt,int xmlmode)1373 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1374 {
1375     mt->output_format = xmlmode;
1376 }
1377 
yaz_marc_debug(yaz_marc_t mt,int level)1378 void yaz_marc_debug(yaz_marc_t mt, int level)
1379 {
1380     if (mt)
1381         mt->debug = level;
1382 }
1383 
yaz_marc_iconv(yaz_marc_t mt,yaz_iconv_t cd)1384 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1385 {
1386     mt->iconv_cd = cd;
1387 }
1388 
yaz_marc_get_iconv(yaz_marc_t mt)1389 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1390 {
1391     return mt->iconv_cd;
1392 }
1393 
yaz_marc_modify_leader(yaz_marc_t mt,size_t off,const char * str)1394 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1395 {
1396     struct yaz_marc_node *n;
1397     char *leader = 0;
1398     for (n = mt->nodes; n; n = n->next)
1399         if (n->which == YAZ_MARC_LEADER)
1400         {
1401             leader = n->u.leader;
1402             memcpy(leader+off, str, strlen(str));
1403             break;
1404         }
1405 }
1406 
yaz_marc_leader_spec(yaz_marc_t mt,const char * leader_spec)1407 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1408 {
1409     xfree(mt->leader_spec);
1410     mt->leader_spec = 0;
1411     if (leader_spec)
1412     {
1413         char dummy_leader[24];
1414         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1415             return -1;
1416         mt->leader_spec = xstrdup(leader_spec);
1417     }
1418     return 0;
1419 }
1420 
marc_exec_leader(const char * leader_spec,char * leader,size_t size)1421 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1422 {
1423     const char *cp = leader_spec;
1424     while (cp)
1425     {
1426         char val[21];
1427         int pos;
1428         int no_read = 0, no = 0;
1429 
1430         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1431         if (no < 2 || no_read < 3)
1432             return -1;
1433         if (pos < 0 || (size_t) pos >= size)
1434             return -1;
1435 
1436         if (*val == '\'')
1437         {
1438             const char *vp = strchr(val+1, '\'');
1439             size_t len;
1440 
1441             if (!vp)
1442                 return -1;
1443             len = vp-val-1;
1444             if (len + pos > size)
1445                 return -1;
1446             memcpy(leader + pos, val+1, len);
1447         }
1448         else if (*val >= '0' && *val <= '9')
1449         {
1450             int ch = atoi(val);
1451             leader[pos] = ch;
1452         }
1453         else
1454             return -1;
1455         cp += no_read;
1456         if (*cp != ',')
1457             break;
1458 
1459         cp++;
1460     }
1461     return 0;
1462 }
1463 
yaz_marc_decode_formatstr(const char * arg)1464 int yaz_marc_decode_formatstr(const char *arg)
1465 {
1466     int mode = -1;
1467     if (!strcmp(arg, "marc"))
1468         mode = YAZ_MARC_ISO2709;
1469     if (!strcmp(arg, "marcxml"))
1470         mode = YAZ_MARC_MARCXML;
1471     if (!strcmp(arg, "turbomarc"))
1472         mode = YAZ_MARC_TURBOMARC;
1473     if (!strcmp(arg, "marcxchange"))
1474         mode = YAZ_MARC_XCHANGE;
1475     if (!strcmp(arg, "line"))
1476         mode = YAZ_MARC_LINE;
1477     if (!strcmp(arg, "json"))
1478         mode = YAZ_MARC_JSON;
1479     return mode;
1480 }
1481 
yaz_marc_write_using_libxml2(yaz_marc_t mt,int enable)1482 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1483 {
1484     mt->write_using_libxml2 = enable;
1485 }
1486 
yaz_marc_check_marc21_coding(const char * charset,const char * marc_buf,int sz)1487 int yaz_marc_check_marc21_coding(const char *charset,
1488                                  const char *marc_buf, int sz)
1489 {
1490     if (charset && (!yaz_matchstr(charset, "MARC8?") ||
1491          !yaz_matchstr(charset, "MARC8"))  && marc_buf && sz > 25
1492         && marc_buf[9] == 'a')
1493         return 1;
1494     return 0;
1495 }
1496 
1497 /*
1498  * Local variables:
1499  * c-basic-offset: 4
1500  * c-file-style: "Stroustrup"
1501  * indent-tabs-mode: nil
1502  * End:
1503  * vim: shiftwidth=4 tabstop=8 expandtab
1504  */
1505 
1506