1 /*
2 * xml2any.c
3 *
4 * Copyright (c) Georgi N. Boshnakov 2020
5 *
6 * The code in this file is based on xxx2yyy utilities by Chris Putnam 2003-2020
7 * Reponsibility for any bugs introduced in this adaptation lies with GNB.
8 *
9 * Program and source code released under the GPL version 2
10 *
11 */
12 #include <stdio.h>
13 #include <stdlib.h>
14
15 #include <R.h>
16
17 #include "bibutils.h"
18 #include "bibformats.h"
19 #include "args.h"
20 #include "bibprog.h"
21
22 // const char progname[] = "xml2bib";
23
24 void
help_xml2bibtex(char * progname)25 help_xml2bibtex( char *progname )
26 {
27 args_tellversion( progname );
28 REprintf( "Converts the MODS XML intermediate reference file "
29 "into Bibtex\n\n");
30
31 REprintf("usage: %s xml_file > bibtex_file\n\n",progname);
32 REprintf(" xml_file can be replaced with file list or omitted to use as a filter\n\n");
33
34 REprintf(" -h, --help display this help\n");
35 REprintf(" -v, --version display version\n");
36 REprintf(" -at, --abbreviatedtitles use abbreviated titles, if available\n");
37 REprintf(" -fc, --finalcomma add final comman to bibtex output\n");
38 REprintf(" -sd, --singledash use one dash '-', not two '--', in page ranges\n" );
39 REprintf(" -b, --brackets use brackets, not quotation marks surrounding data\n");
40 REprintf(" -w, --whitespace use beautifying whitespace to output\n");
41 REprintf(" -sk, --strictkey use only alphanumeric characters for bibtex key\n");
42 REprintf(" (overly strict, but useful for other programs)\n");
43 REprintf(" -nl, --no-latex no latex encodings; put characters in directly\n");
44 REprintf(" -nb, --no-bom do not write Byte Order Mark in UTF8 output\n");
45 REprintf(" -U, --uppercase write bibtex tags/types in upper case\n" );
46 REprintf(" -s, --single-refperfile one reference per output file\n");
47 REprintf(" -i, --input-encoding interpret input file with requested character set\n" );
48 REprintf(" (use argument for current list)\n");
49 REprintf(" -o, --output-encoding write output file with requested character set\n" );
50 REprintf(" (use argument for current list)\n");
51 REprintf(" --verbose for verbose\n" );
52 REprintf(" --debug for debug output\n" );
53 REprintf("\n");
54
55 REprintf("Citation codes generated from <REFNUM> tag. See \n");
56 REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
57 }
58
59
60 void
help_xml2biblatex(char * progname)61 help_xml2biblatex( char *progname )
62 {
63 args_tellversion( progname );
64 REprintf("Converts the MODS XML intermediate reference file "
65 "into BibLaTex\n\n");
66
67 REprintf("usage: %s xml_file > biblatex_file\n\n",progname);
68 REprintf(" xml_file can be replaced with file list or omitted to use as a filter\n\n");
69
70 REprintf(" -h, --help display this help\n");
71 REprintf(" -v, --version display version\n");
72 REprintf(" -at, --abbreviatedtitles use abbreviated titles, if available\n");
73 REprintf(" -fc, --finalcomma add final comman to biblatex output\n");
74 REprintf(" -sd, --singledash use one dash '-', not two '--', in page ranges\n" );
75 REprintf(" -b, --brackets use brackets, not quotation marks surrounding data\n");
76 REprintf(" -w, --whitespace use beautifying whitespace to output\n");
77 REprintf(" -sk, --strictkey use only alphanumeric characters for bibtex key\n");
78 REprintf(" (overly strict, but useful for other programs)\n");
79 REprintf(" -nl, --no-latex no latex encodings; put characters in directly\n");
80 REprintf(" -nb, --no-bom do not write Byte Order Mark in UTF8 output\n");
81 REprintf(" -U, --uppercase write biblatex tags/types in upper case\n" );
82 REprintf(" -s, --single-refperfile one reference per output file\n");
83 REprintf(" -i, --input-encoding interpret input file with requested character set\n" );
84 REprintf(" (use argument for current list)\n");
85 REprintf(" -o, --output-encoding write output file with requested character set\n" );
86 REprintf(" (use argument for current list)\n");
87 REprintf(" --verbose for verbose\n" );
88 REprintf(" --debug for debug output\n" );
89 REprintf("\n");
90
91 REprintf("Citation codes generated from <REFNUM> tag. See \n");
92 REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
93 }
94
95
96 void
help_xml2end(char * progname)97 help_xml2end( char *progname )
98 {
99 args_tellversion( progname );
100 REprintf("Converts an XML intermediate reference file into a pre-EndNote format\n\n");
101
102 REprintf("usage: %s xml_file > endnote_file\n\n", progname);
103 REprintf(" xml_file can be replaced with file list or omitted to use as a filter\n\n");
104
105 REprintf(" -h, --help display this help\n");
106 REprintf(" -v, --version display version\n\n");
107 REprintf(" -nb, --no-bom do not write Byte Order Mark in UTF8 output\n");
108 REprintf(" -s, --single-refperfile one reference per output file\n");
109 REprintf(" -i, --input-encoding interpret input file with requested character set (use\n" );
110 REprintf(" argument for current list)\n");
111 REprintf(" -o, --output-encoding interprest output file with requested character set\n" );
112 REprintf(" --verbose for verbose output\n");
113 REprintf(" --debug for debug output\n");
114
115 REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
116 }
117
118 void
help_xml2isi(char * progname)119 help_xml2isi( char *progname )
120 {
121 args_tellversion( progname );
122 REprintf("Converts an XML intermediate reference file into ISI format\n\n");
123
124 REprintf("usage: %s xml_file > isi_file\n\n", progname);
125 REprintf(" xml_file can be replaced with file list or omitted to use as a filter\n\n");
126
127 REprintf(" -h, --help display this help\n");
128 REprintf(" -v, --version display version\n\n");
129 REprintf(" -nb, --no-bom do not write Byte Order Mark in UTF8 output\n");
130 REprintf(" -s, --single-refperfile one reference per output file\n");
131 REprintf(" -i, --input-encoding interpret input file with requested character set\n" );
132 REprintf(" (use w/o argument for current list)\n" );
133 REprintf(" -o, --output-encoding write output file with requested character set\n" );
134 REprintf(" (use w/o argument for current list)\n" );
135 REprintf(" --verbose for verbose output\n");
136 REprintf(" --debug for debug output\n");
137
138 REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
139 }
140
141
142 void
help_xml2nbib(char * progname)143 help_xml2nbib( char *progname )
144 {
145 args_tellversion( progname );
146 REprintf("Converts an XML intermediate reference file into NBIB format\n\n");
147
148 REprintf("usage: %s xml_file > nbib_file\n\n", progname);
149 REprintf(" xml_file can be replaced with file list or omitted to use as a filter\n\n");
150
151 REprintf(" -h, --help display this help\n");
152 REprintf(" -v, --version display version\n\n");
153 REprintf(" -nb, --no-bom do not write Byte Order Mark in UTF8 output\n");
154 REprintf(" -s, --single-refperfile one reference per output file\n");
155 REprintf(" -i, --input-encoding interpret input file with requested character set\n" );
156 REprintf(" (use w/o argument for current list)\n" );
157 REprintf(" -o, --output-encoding write output file with requested character set\n" );
158 REprintf(" (use w/o argument for current list)\n" );
159 REprintf(" --verbose for verbose output\n");
160 REprintf(" --debug for debug output\n");
161
162 REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
163 }
164
165 void
help_xml2ris(char * progname)166 help_xml2ris( char *progname )
167 {
168 args_tellversion( progname );
169 REprintf("Converts an XML intermediate reference file into RIS format\n\n");
170
171 REprintf("usage: %s xml_file > ris_file\n\n",progname);
172 REprintf(" xml_file can be replaced with file list or omitted to use as a filter\n\n");
173 REprintf(" -h, --help display this help\n");
174 REprintf(" -v, --version display version\n\n");
175 REprintf(" -nb, --no-bom do not write Byte Order Mark in UTF8 output\n");
176 REprintf(" -s, --single-refperfile one reference per output file\n");
177 REprintf(" -i, --input-encoding interpret the input with specified character set\n" );
178 REprintf(" (use w/o argument for current list)\n" );
179 REprintf(" -o, --output-encoding write the output with specified character set\n" );
180 REprintf(" (use w/o argument for current list)\n" );
181 REprintf(" --verbose for verbose output\n");
182 REprintf(" --debug for debug output\n");
183
184 REprintf("Citation codes (ID - ) generated from <REFNUM> tag. See \n");
185 REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
186 }
187
188 void
help_xml2wordbib(char * progname)189 help_xml2wordbib( char *progname )
190 {
191 args_tellversion( progname );
192 REprintf("Converts a MODS XML bibliogrphy into Word2007 format bibliography\n\n");
193 REprintf("usage: %s xml_file > word_file\n\n", progname );
194 REprintf(" xml_file can be replaced with file list or omitted to use as a filter\n\n" );
195 REprintf(" -h, --help display this help\n" );
196 REprintf(" -v, --version display version\n\n" );
197 REprintf(" -nb, --no-bom do not write Byte Order Mark if writing UTF8\n" );
198 REprintf(" -s, --single-refperfile one reference per output file\n");
199 REprintf(" -i, --input-encoding interpret input file as using requested character set\n");
200 REprintf(" (use w/o argument for current list)\n" );
201 REprintf(" --verbose for verbose output\n" );
202 REprintf(" --debug for debug output\n" );
203
204 REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n" );
205 }
206
207
208 void
help_xml2ads(char * progname)209 help_xml2ads( char *progname )
210 {
211 args_tellversion( progname );
212 REprintf("Converts an XML intermediate reference file into a ADS aabstracts format\n\n");
213
214 REprintf("usage: %s xml_file > adsabs_file\n\n", progname );
215 REprintf(" xml_file can be replaced with file list or omitted to use as a filter\n\n");
216 REprintf(" -h, --help display this help\n");
217 REprintf(" -v, --version display version\n");
218 REprintf(" -nb, --no-bom do not write Byte Order Mark in UTF8 output\n");
219 REprintf(" -s, --single-refperfile one reference per output file\n");
220 REprintf(" --verbose for verbose output\n");
221 REprintf(" --debug for debug output\n");
222
223 REprintf("\nhttp://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
224 }
225
226
227
228 //void helpA( char *progname )
229 void (*helpAll[])(char *) = {
230 help_xml2bibtex,
231 help_xml2biblatex,
232
233 help_xml2end,
234 help_xml2isi,
235 help_xml2nbib,
236 help_xml2ris,
237 help_xml2wordbib,
238 help_xml2ads
239 };
240
241 // extern void process_args( int *argc, char *argv[], param *p );
242
243 // xml2bib
244 void
process_args(int * argc,char * argv[],param * p,const char * progname[])245 process_args( int *argc, char *argv[], param *p, const char *progname[] )
246 {
247 void (*help)( char *progname );
248
249 if(strcmp(*progname, "xml2bib") == 0 || strcmp(*progname, "xml2biblatex") == 0){
250 int i, j, subtract;
251 i = 1;
252 while ( i<*argc ) {
253 subtract = 0;
254 if ( args_match( argv[i], "-h", "--help" ) ) {
255 if(strcmp(*progname, "xml2bib") == 0)
256 help = helpAll[0];
257 else
258 help = helpAll[1];
259
260 help( p->progname );
261 // error("\n"); // exit( EXIT_SUCCESS );
262 subtract = 1;
263 } else if ( args_match( argv[i], "-v", "--version" ) ) {
264 args_tellversion( p->progname );
265 // error("\n"); // exit( EXIT_SUCCESS );
266 subtract = 1;
267 } else if ( args_match( argv[i], "-fc", "--finalcomma" ) ) {
268 p->format_opts |= BIBL_FORMAT_BIBOUT_FINALCOMMA;
269 subtract = 1;
270 } else if ( args_match( argv[i], "-s", "--single-refperfile" )){
271 p->singlerefperfile = 1;
272 subtract = 1;
273 } else if ( args_match( argv[i], "-sd", "--singledash" ) ) {
274 p->format_opts |= BIBL_FORMAT_BIBOUT_SINGLEDASH;
275 subtract = 1;
276 } else if ( args_match( argv[i], "-b", "--brackets" ) ) {
277 p->format_opts |= BIBL_FORMAT_BIBOUT_BRACKETS;
278 subtract = 1;
279 } else if ( args_match( argv[i], "-w", "--whitespace" ) ) {
280 p->format_opts |= BIBL_FORMAT_BIBOUT_WHITESPACE;
281 subtract = 1;
282 } else if ( args_match( argv[i], "-sk", "--strictkey" ) ) {
283 p->format_opts |= BIBL_FORMAT_BIBOUT_STRICTKEY;
284 subtract = 1;
285 } else if ( args_match( argv[i], "-U", "--uppercase" ) ) {
286 p->format_opts |= BIBL_FORMAT_BIBOUT_UPPERCASE;
287 subtract = 1;
288 } else if ( args_match( argv[i], "-at", "--abbreviated-titles" ) ) {
289 p->format_opts |= BIBL_FORMAT_BIBOUT_SHORTTITLE;
290 subtract = 1;
291 } else if ( args_match( argv[i], "-nl", "--no-latex" ) ) {
292 p->latexout = 0;
293 subtract = 1;
294 } else if ( args_match( argv[i], "-nb", "--no-bom" ) ) {
295 p->utf8bom = 0;
296 subtract = 1;
297 } else if ( args_match( argv[i], "-d", "--drop-key" ) ) {
298 p->format_opts |= BIBL_FORMAT_BIBOUT_DROPKEY;
299 subtract = 1;
300 } else if ( args_match( argv[i], "--verbose", "" ) ) {
301 p->verbose = 1;
302 subtract = 1;
303 } else if ( args_match( argv[i], "--debug", "" ) ) {
304 p->verbose = 3;
305 subtract = 1;
306 }
307 if ( subtract ) {
308 for ( j=i+subtract; j<*argc; ++j )
309 argv[j-subtract] = argv[j];
310 *argc -= subtract;
311 } else {
312 if ( argv[i][0]=='-' ) REprintf("(xml2any.c:312) Warning did not recognize potential command-line option %s\n", argv[i] );
313 i++;
314 }
315 }
316 }else{ // the remaining xml2xxx
317 // process_args for the rest
318 int i, j, subtract;
319 i = 1;
320 while ( i<*argc ) {
321 subtract = 0;
322 if ( args_match( argv[i], "-h", "--help" ) ) {
323 if(strcmp(*progname, "xml2ads") == 0)
324 help_xml2ads( p->progname );
325 else if(strcmp(*progname, "xml2end") == 0)
326 help_xml2end( p->progname );
327 else if(strcmp(*progname, "xml2isi") == 0)
328 help_xml2isi( p->progname );
329 else if(strcmp(*progname, "xml2nbib") == 0)
330 help_xml2nbib( p->progname );
331 else if(strcmp(*progname, "xml2ris") == 0)
332 help_xml2ris( p->progname );
333 else if(strcmp(*progname, "xml2wordbib") == 0)
334 help_xml2wordbib( p->progname );
335 else
336 error("currently help for %s is not available", p->progname);
337
338 // error("\n"); // exit( EXIT_SUCCESS );
339 subtract = 1;
340 } else if ( args_match( argv[i], "-v", "--version" ) ) {
341 args_tellversion( p->progname );
342 error("\n"); // exit( EXIT_SUCCESS );
343 } else if ( args_match( argv[i], "-s", "--single-refperfile")){
344 p->singlerefperfile = 1;
345 subtract = 1;
346 } else if ( args_match( argv[i], "-nb", "--no-bom" ) ) {
347 p->utf8bom = 0;
348 subtract = 1;
349 } else if ( args_match( argv[i], "--verbose", "" ) ) {
350 p->verbose = 1;
351 subtract = 1;
352 } else if ( args_match( argv[i], "--debug", "" ) ) {
353 p->verbose = 3;
354 subtract = 1;
355 } else if ( args_match( argv[i], "-nl", "--no-latex" ) ) {
356 // not relevant here, just ignore (TODO: maybe the calling R code should take care of this)
357 subtract = 1;
358 }
359 if ( subtract ) {
360 for ( j=i+subtract; j<*argc; ++j )
361 argv[j-subtract] = argv[j];
362 *argc -= subtract;
363 } else {
364 if ( argv[i][0]=='-' ) REprintf( "(xml2any.c:361) Warning: Did not recognize potential command-line argument %s\n", argv[i] );
365 i++;
366 }
367 }
368 }
369 }
370
371 // int
372 void
373 // xml2any_main( int *argc, char *argv[], char *outfile[], const char *progname_in[] )
xml2any_main(int * argc,char * argv[],char * outfile[],double * nref)374 xml2any_main( int *argc, char *argv[], char *outfile[], double *nref )
375 {
376 // Georgi
377 // REprintf("argc: %d\n", *argc);
378 // for( int ii = 0; ii < *argc; ii++)
379 // REprintf("argv[%d]: %s\n", ii, argv[ii]);
380
381 const char *progname = argv[0];
382
383 param p;
384 // int ihelp;
385 modsin_initparams( &p, progname );
386
387 if(strcmp(progname, "xml2bib") == 0){
388 bibtexout_initparams( &p, progname );
389 // ihelp = 0;
390 }else if(strcmp(progname, "xml2biblatex") == 0){
391 biblatexout_initparams( &p, progname );
392 // ihelp = 2;
393 }else if(strcmp(progname, "xml2copac") == 0){
394 bibl_freeparams( &p );
395 error("export to copac format not implemented");
396 // copacout_initparams( &p, progname );
397 // ihelp = 4;
398 }else if(strcmp(progname, "xml2ebi") == 0){
399 bibl_freeparams( &p );
400 error("export to EBI XML format not implemented");
401 // ebiout_initparams( &p, progname );
402 // ihelp = 6;
403 }else if(strcmp(progname, "xml2end") == 0){
404 endout_initparams( &p, progname );
405 // ihelp = 8;
406 }else if(strcmp(progname, "xml2endx") == 0){
407 bibl_freeparams( &p );
408 error("export to Endnote XML format not implemented");
409 // endxout_initparams( &p, progname );
410 // ihelp = 10;
411 }else if(strcmp(progname, "xml2isi") == 0){
412 isiout_initparams( &p, progname );
413 // ihelp = 12;
414 }else if(strcmp(progname, "xml2med") == 0){
415 bibl_freeparams( &p );
416 error("export to Medline XML format not implemented");
417 // medout_initparams( &p, progname );
418 // ihelp = 14;
419 }else if(strcmp(progname, "xml2nbib") == 0){
420 nbibout_initparams( &p, progname );
421 // ihelp = 16;
422 }else if(strcmp(progname, "xml2ris") == 0){
423 risout_initparams( &p, progname );
424 // ihelp = 18;
425 }else if(strcmp(progname, "xml2wordbib") == 0){
426 wordout_initparams( &p, progname );
427 // ihelp = 20;
428 }else if(strcmp(progname, "xml2ads") == 0){
429 adsout_initparams( &p, progname );
430 // ihelp = 22;
431 }else if(strcmp(progname, "xml2bibentry") == 0){
432 bibentryout_initparams( &p, progname );
433 }else {
434 bibl_freeparams( &p );
435 error("cannot deduce output format from name %s", progname);
436 }
437
438 process_charsets( argc, argv, &p );
439
440 process_args( argc, argv, &p, &progname ); // process_args( &argc, argv, &p );
441
442 //Georgi
443 //REprintf("OOOOh: p.latexout: %d, p.charsetout: %d\n", p.latexout, p.charsetout );
444
445 *nref = bibprog( argc[0], argv, &p, outfile ); // bibprog( argc, argv, &p );
446
447 // Georgi, no need to print, returned to caller
448 // if( p.progname ) REprintf( "%s: ", p.progname );
449 // REprintf( "processed %g references.\n", *nref );
450
451
452 bibl_freeparams( &p );
453 // return EXIT_SUCCESS;
454 }
455
456