1 /*
2  * xml2any.c
3  *
4  * Copyright (c) Georgi N. Boshnakov 2020
5  *
6  * The code in this file is based on xxx2yyy utilities by Chris Putnam 2003-2020
7  * Reponsibility for any bugs introduced in this adaptation lies with GNB.
8  *
9  * Program and source code released under the GPL version 2
10  *
11  */
12 #include <stdio.h>
13 #include <stdlib.h>
14 
15 #include <R.h>
16 
17 #include "bibutils.h"
18 #include "bibformats.h"
19 #include "args.h"
20 #include "bibprog.h"
21 
22 // const char progname[] = "xml2bib";
23 
24 void
help_xml2bibtex(char * progname)25 help_xml2bibtex( char *progname )
26 {
27 	args_tellversion( progname );
28 	REprintf( "Converts the MODS XML intermediate reference file "
29 			"into Bibtex\n\n");
30 
31 	REprintf("usage: %s xml_file > bibtex_file\n\n",progname);
32         REprintf("  xml_file can be replaced with file list or omitted to use as a filter\n\n");
33 
34 	REprintf("  -h,  --help               display this help\n");
35 	REprintf("  -v,  --version            display version\n");
36 	REprintf("  -at, --abbreviatedtitles  use abbreviated titles, if available\n");
37 	REprintf("  -fc, --finalcomma         add final comman to bibtex output\n");
38 	REprintf("  -sd, --singledash         use one dash '-', not two '--', in page ranges\n" );
39 	REprintf("  -b,  --brackets           use brackets, not quotation marks surrounding data\n");
40 	REprintf("  -w,  --whitespace         use beautifying whitespace to output\n");
41 	REprintf("  -sk, --strictkey          use only alphanumeric characters for bibtex key\n");
42 	REprintf("                            (overly strict, but useful for other programs)\n");
43 	REprintf("  -nl, --no-latex           no latex encodings; put characters in directly\n");
44 	REprintf("  -nb, --no-bom             do not write Byte Order Mark in UTF8 output\n");
45 	REprintf("  -U,  --uppercase          write bibtex tags/types in upper case\n" );
46 	REprintf("  -s,  --single-refperfile  one reference per output file\n");
47 	REprintf("  -i, --input-encoding      interpret input file with requested character set\n" );
48 	REprintf("                            (use argument for current list)\n");
49 	REprintf("  -o, --output-encoding     write output file with requested character set\n" );
50 	REprintf("                            (use argument for current list)\n");
51 	REprintf("  --verbose                 for verbose\n" );
52 	REprintf("  --debug                   for debug output\n" );
53 	REprintf("\n");
54 
55 	REprintf("Citation codes generated from <REFNUM> tag.   See \n");
56 	REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
57 }
58 
59 
60 void
help_xml2biblatex(char * progname)61 help_xml2biblatex( char *progname )
62 {
63 	args_tellversion( progname );
64 	REprintf("Converts the MODS XML intermediate reference file "
65 			"into BibLaTex\n\n");
66 
67 	REprintf("usage: %s xml_file > biblatex_file\n\n",progname);
68         REprintf("  xml_file can be replaced with file list or omitted to use as a filter\n\n");
69 
70 	REprintf("  -h,  --help               display this help\n");
71 	REprintf("  -v,  --version            display version\n");
72 	REprintf("  -at, --abbreviatedtitles  use abbreviated titles, if available\n");
73 	REprintf("  -fc, --finalcomma         add final comman to biblatex output\n");
74 	REprintf("  -sd, --singledash         use one dash '-', not two '--', in page ranges\n" );
75 	REprintf("  -b,  --brackets           use brackets, not quotation marks surrounding data\n");
76 	REprintf("  -w,  --whitespace         use beautifying whitespace to output\n");
77 	REprintf("  -sk, --strictkey          use only alphanumeric characters for bibtex key\n");
78 	REprintf("                            (overly strict, but useful for other programs)\n");
79 	REprintf("  -nl, --no-latex           no latex encodings; put characters in directly\n");
80 	REprintf("  -nb, --no-bom             do not write Byte Order Mark in UTF8 output\n");
81 	REprintf("  -U,  --uppercase          write biblatex tags/types in upper case\n" );
82 	REprintf("  -s,  --single-refperfile  one reference per output file\n");
83 	REprintf("  -i, --input-encoding      interpret input file with requested character set\n" );
84 	REprintf("                            (use argument for current list)\n");
85 	REprintf("  -o, --output-encoding     write output file with requested character set\n" );
86 	REprintf("                            (use argument for current list)\n");
87 	REprintf("  --verbose                 for verbose\n" );
88 	REprintf("  --debug                   for debug output\n" );
89 	REprintf("\n");
90 
91 	REprintf("Citation codes generated from <REFNUM> tag.   See \n");
92 	REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
93 }
94 
95 
96 void
help_xml2end(char * progname)97 help_xml2end( char *progname )
98 {
99 	args_tellversion( progname );
100 	REprintf("Converts an XML intermediate reference file into a pre-EndNote format\n\n");
101 
102 	REprintf("usage: %s xml_file > endnote_file\n\n", progname);
103         REprintf("  xml_file can be replaced with file list or omitted to use as a filter\n\n");
104 
105 	REprintf("  -h, --help     display this help\n");
106 	REprintf("  -v, --version  display version\n\n");
107 	REprintf("  -nb, --no-bom   do not write Byte Order Mark in UTF8 output\n");
108 	REprintf("  -s, --single-refperfile one reference per output file\n");
109 	REprintf("  -i, --input-encoding interpret input file with requested character set (use\n" );
110 	REprintf("                       argument for current list)\n");
111 	REprintf("  -o, --output-encoding interprest output file with requested character set\n" );
112 	REprintf("  --verbose      for verbose output\n");
113 	REprintf("  --debug        for debug output\n");
114 
115 	REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
116 }
117 
118 void
help_xml2isi(char * progname)119 help_xml2isi( char *progname )
120 {
121 	args_tellversion( progname );
122 	REprintf("Converts an XML intermediate reference file into ISI format\n\n");
123 
124 	REprintf("usage: %s xml_file > isi_file\n\n", progname);
125         REprintf("  xml_file can be replaced with file list or omitted to use as a filter\n\n");
126 
127 	REprintf("  -h, --help     display this help\n");
128 	REprintf("  -v, --version  display version\n\n");
129 	REprintf("  -nb, --no-bom  do not write Byte Order Mark in UTF8 output\n");
130 	REprintf("  -s, --single-refperfile one reference per output file\n");
131 	REprintf("  -i, --input-encoding  interpret input file with requested character set\n" );
132 	REprintf("                       (use w/o argument for current list)\n" );
133 	REprintf("  -o, --output-encoding write output file with requested character set\n" );
134 	REprintf("                       (use w/o argument for current list)\n" );
135 	REprintf("  --verbose      for verbose output\n");
136 	REprintf("  --debug        for debug output\n");
137 
138 	REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
139 }
140 
141 
142 void
help_xml2nbib(char * progname)143 help_xml2nbib( char *progname )
144 {
145 	args_tellversion( progname );
146 	REprintf("Converts an XML intermediate reference file into NBIB format\n\n");
147 
148 	REprintf("usage: %s xml_file > nbib_file\n\n", progname);
149         REprintf("  xml_file can be replaced with file list or omitted to use as a filter\n\n");
150 
151 	REprintf("  -h, --help     display this help\n");
152 	REprintf("  -v, --version  display version\n\n");
153 	REprintf("  -nb, --no-bom  do not write Byte Order Mark in UTF8 output\n");
154 	REprintf("  -s, --single-refperfile one reference per output file\n");
155 	REprintf("  -i, --input-encoding  interpret input file with requested character set\n" );
156 	REprintf("                       (use w/o argument for current list)\n" );
157 	REprintf("  -o, --output-encoding write output file with requested character set\n" );
158 	REprintf("                       (use w/o argument for current list)\n" );
159 	REprintf("  --verbose      for verbose output\n");
160 	REprintf("  --debug        for debug output\n");
161 
162 	REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
163 }
164 
165 void
help_xml2ris(char * progname)166 help_xml2ris( char *progname )
167 {
168 	args_tellversion( progname );
169         REprintf("Converts an XML intermediate reference file into RIS format\n\n");
170 
171         REprintf("usage: %s xml_file > ris_file\n\n",progname);
172         REprintf("  xml_file can be replaced with file list or omitted to use as a filter\n\n");
173         REprintf("  -h, --help     display this help\n");
174         REprintf("  -v, --version  display version\n\n");
175         REprintf("  -nb, --no-bom  do not write Byte Order Mark in UTF8 output\n");
176         REprintf("  -s, --single-refperfile one reference per output file\n");
177         REprintf("  -i, --input-encoding  interpret the input with specified character set\n" );
178         REprintf("                        (use w/o argument for current list)\n" );
179         REprintf("  -o, --output-encoding write the output with specified character set\n" );
180         REprintf("                        (use w/o argument for current list)\n" );
181         REprintf("  --verbose      for verbose output\n");
182         REprintf("  --debug        for debug output\n");
183 
184 	REprintf("Citation codes (ID  - ) generated from <REFNUM> tag.   See \n");
185 	REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
186 }
187 
188 void
help_xml2wordbib(char * progname)189 help_xml2wordbib( char *progname )
190 {
191 	args_tellversion( progname );
192         REprintf("Converts a MODS XML bibliogrphy into Word2007 format bibliography\n\n");
193         REprintf("usage: %s xml_file > word_file\n\n", progname );
194         REprintf("  xml_file can be replaced with file list or omitted to use as a filter\n\n" );
195         REprintf("  -h, --help              display this help\n" );
196         REprintf("  -v, --version           display version\n\n" );
197         REprintf("  -nb, --no-bom           do not write Byte Order Mark if writing UTF8\n" );
198         REprintf("  -s, --single-refperfile one reference per output file\n");
199         REprintf("  -i, --input-encoding    interpret input file as using requested character set\n");
200         REprintf("                          (use w/o argument for current list)\n" );
201         REprintf("  --verbose               for verbose output\n" );
202         REprintf("  --debug                 for debug output\n" );
203 
204         REprintf("http://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n" );
205 }
206 
207 
208 void
help_xml2ads(char * progname)209 help_xml2ads( char *progname )
210 {
211 	args_tellversion( progname );
212 	REprintf("Converts an XML intermediate reference file into a ADS aabstracts format\n\n");
213 
214 	REprintf("usage: %s xml_file > adsabs_file\n\n", progname );
215         REprintf("  xml_file can be replaced with file list or omitted to use as a filter\n\n");
216 	REprintf("  -h, --help               display this help\n");
217 	REprintf("  -v, --version            display version\n");
218 	REprintf("  -nb, --no-bom            do not write Byte Order Mark in UTF8 output\n");
219 	REprintf("  -s, --single-refperfile  one reference per output file\n");
220 	REprintf("  --verbose                for verbose output\n");
221 	REprintf("  --debug                  for debug output\n");
222 
223 	REprintf("\nhttp://sourceforge.net/p/bibutils/home/Bibutils for more details\n\n");
224 }
225 
226 
227 
228 //void helpA( char *progname )
229 void (*helpAll[])(char *) = {
230 			     help_xml2bibtex,
231 			     help_xml2biblatex,
232 
233 			     help_xml2end,
234 			     help_xml2isi,
235 			     help_xml2nbib,
236 			     help_xml2ris,
237 			     help_xml2wordbib,
238 			     help_xml2ads
239 };
240 
241 // extern void process_args( int *argc, char *argv[], param *p );
242 
243 // xml2bib
244 void
process_args(int * argc,char * argv[],param * p,const char * progname[])245 process_args( int *argc, char *argv[], param *p, const char *progname[] )
246 {
247   void (*help)( char *progname );
248 
249 	if(strcmp(*progname, "xml2bib") == 0  || strcmp(*progname, "xml2biblatex") == 0){
250 	  int i, j, subtract;
251 	  i = 1;
252 	  while ( i<*argc ) {
253 	  	subtract = 0;
254 	  	if ( args_match( argv[i], "-h", "--help" ) ) {
255 		  	if(strcmp(*progname, "xml2bib") == 0)
256 		  	  help = helpAll[0];
257 		  	else
258 		  	  help = helpAll[1];
259 
260 		        help( p->progname );
261 	  		// error("\n"); // exit( EXIT_SUCCESS );
262 	  		subtract = 1;
263 	  	} else if ( args_match( argv[i], "-v", "--version" ) ) {
264 	  		args_tellversion( p->progname );
265 	  		// error("\n"); // exit( EXIT_SUCCESS );
266 	  		subtract = 1;
267 	  	} else if ( args_match( argv[i], "-fc", "--finalcomma" ) ) {
268 	  		p->format_opts |= BIBL_FORMAT_BIBOUT_FINALCOMMA;
269 	  		subtract = 1;
270 	  	} else if ( args_match( argv[i], "-s", "--single-refperfile" )){
271 	  		p->singlerefperfile = 1;
272 	  		subtract = 1;
273 	  	} else if ( args_match( argv[i], "-sd", "--singledash" ) ) {
274 	  		p->format_opts |= BIBL_FORMAT_BIBOUT_SINGLEDASH;
275 	  		subtract = 1;
276 	  	} else if ( args_match( argv[i], "-b", "--brackets" ) ) {
277 	  		p->format_opts |= BIBL_FORMAT_BIBOUT_BRACKETS;
278 	  		subtract = 1;
279 	  	} else if ( args_match( argv[i], "-w", "--whitespace" ) ) {
280 	  		p->format_opts |= BIBL_FORMAT_BIBOUT_WHITESPACE;
281 	  		subtract = 1;
282 	  	} else if ( args_match( argv[i], "-sk", "--strictkey" ) ) {
283 	  		p->format_opts |= BIBL_FORMAT_BIBOUT_STRICTKEY;
284 	  		subtract = 1;
285 	  	} else if ( args_match( argv[i], "-U", "--uppercase" ) ) {
286 	  		p->format_opts |= BIBL_FORMAT_BIBOUT_UPPERCASE;
287 	  		subtract = 1;
288 	  	} else if ( args_match( argv[i], "-at", "--abbreviated-titles" ) ) {
289 	  		p->format_opts |= BIBL_FORMAT_BIBOUT_SHORTTITLE;
290 	  		subtract = 1;
291 	  	} else if ( args_match( argv[i], "-nl", "--no-latex" ) ) {
292 	  		p->latexout = 0;
293 	  		subtract = 1;
294 	  	} else if ( args_match( argv[i], "-nb", "--no-bom" ) ) {
295 	  		p->utf8bom = 0;
296 	  		subtract = 1;
297 	  	} else if ( args_match( argv[i], "-d", "--drop-key" ) ) {
298 	  		p->format_opts |= BIBL_FORMAT_BIBOUT_DROPKEY;
299 	  		subtract = 1;
300 	  	} else if ( args_match( argv[i], "--verbose", "" ) ) {
301 	  		p->verbose = 1;
302 	  		subtract = 1;
303 	  	} else if ( args_match( argv[i], "--debug", "" ) ) {
304 	  		p->verbose = 3;
305 	  		subtract = 1;
306 	  	}
307 	  	if ( subtract ) {
308 	  		for ( j=i+subtract; j<*argc; ++j )
309 	  			argv[j-subtract] = argv[j];
310 	  		*argc -= subtract;
311 	  	} else {
312 	  		if ( argv[i][0]=='-' ) REprintf("(xml2any.c:312) Warning did not recognize potential command-line option %s\n", argv[i] );
313 	  		i++;
314 	  	}
315 	  }
316 	}else{ // the remaining xml2xxx
317 	  // process_args for the rest
318 	  int i, j, subtract;
319 	  i = 1;
320 	  while ( i<*argc ) {
321 	  	subtract = 0;
322 	  	if ( args_match( argv[i], "-h", "--help" ) ) {
323 		  	if(strcmp(*progname, "xml2ads") == 0)
324 			  help_xml2ads( p->progname );
325 		  	else if(strcmp(*progname, "xml2end") == 0)
326 			  help_xml2end( p->progname );
327 		  	else if(strcmp(*progname, "xml2isi") == 0)
328 			  help_xml2isi( p->progname );
329 		  	else if(strcmp(*progname, "xml2nbib") == 0)
330 			  help_xml2nbib( p->progname );
331 		  	else if(strcmp(*progname, "xml2ris") == 0)
332 			  help_xml2ris( p->progname );
333 		  	else if(strcmp(*progname, "xml2wordbib") == 0)
334 			  help_xml2wordbib( p->progname );
335 		  	else
336 		  	  error("currently help for %s is not available", p->progname);
337 
338 	  		// error("\n"); // exit( EXIT_SUCCESS );
339 	  		subtract = 1;
340 	  	} else if ( args_match( argv[i], "-v", "--version" ) ) {
341 	  		args_tellversion( p->progname );
342 	  		error("\n"); // exit( EXIT_SUCCESS );
343 	  	} else if ( args_match( argv[i], "-s", "--single-refperfile")){
344 	  		p->singlerefperfile = 1;
345 	  		subtract = 1;
346 	  	} else if ( args_match( argv[i], "-nb", "--no-bom" ) ) {
347 	  		p->utf8bom = 0;
348 	  		subtract = 1;
349 	  	} else if ( args_match( argv[i], "--verbose", "" ) ) {
350 	  		p->verbose = 1;
351 	  		subtract = 1;
352 	  	} else if ( args_match( argv[i], "--debug", "" ) ) {
353 	  		p->verbose = 3;
354 	  		subtract = 1;
355 	  	} else if ( args_match( argv[i], "-nl", "--no-latex" ) ) {
356 		        // not relevant here, just ignore (TODO: maybe the calling R code should take care of this)
357 	  		subtract = 1;
358 	  	}
359 	  	if ( subtract ) {
360 	  		for ( j=i+subtract; j<*argc; ++j )
361 	  			argv[j-subtract] = argv[j];
362 	  		*argc -= subtract;
363 	  	} else {
364 	  		if ( argv[i][0]=='-' ) REprintf( "(xml2any.c:361) Warning: Did not recognize potential command-line argument %s\n", argv[i] );
365 	  		i++;
366 	  	}
367 	  }
368 	}
369 }
370 
371 // int
372 void
373 // xml2any_main( int *argc, char *argv[], char *outfile[], const char *progname_in[] )
xml2any_main(int * argc,char * argv[],char * outfile[],double * nref)374 xml2any_main( int *argc, char *argv[], char *outfile[], double *nref )
375 {
376   // Georgi
377   // REprintf("argc: %d\n", *argc);
378   // for( int ii = 0; ii < *argc; ii++)
379   //   REprintf("argv[%d]: %s\n", ii, argv[ii]);
380 
381   const char *progname = argv[0];
382 
383       	param p;
384 	// int ihelp;
385 	modsin_initparams( &p, progname );
386 
387 	if(strcmp(progname, "xml2bib") == 0){
388 	  bibtexout_initparams( &p, progname );
389 	  // ihelp = 0;
390 	}else if(strcmp(progname,  "xml2biblatex") == 0){
391 	  biblatexout_initparams( &p, progname );
392 	  // ihelp = 2;
393 	}else if(strcmp(progname, "xml2copac") == 0){
394 	  bibl_freeparams( &p );
395 	  error("export to copac format not implemented");
396 	  // copacout_initparams( &p, progname );
397 	  // ihelp = 4;
398 	}else if(strcmp(progname, "xml2ebi") == 0){
399 	  bibl_freeparams( &p );
400 	  error("export to EBI XML format not implemented");
401 	  // ebiout_initparams( &p, progname );
402 	  // ihelp = 6;
403 	}else if(strcmp(progname, "xml2end") == 0){
404 	  endout_initparams( &p, progname );
405 	  // ihelp = 8;
406 	}else if(strcmp(progname, "xml2endx") == 0){
407 	  bibl_freeparams( &p );
408 	  error("export to Endnote XML format not implemented");
409 	  // endxout_initparams( &p, progname );
410 	  // ihelp = 10;
411 	}else if(strcmp(progname, "xml2isi") == 0){
412 	  isiout_initparams( &p, progname );
413 	  // ihelp = 12;
414 	}else if(strcmp(progname, "xml2med") == 0){
415 	  bibl_freeparams( &p );
416 	  error("export to Medline XML format not implemented");
417 	  // medout_initparams( &p, progname );
418 	  // ihelp = 14;
419 	}else if(strcmp(progname, "xml2nbib") == 0){
420 	  nbibout_initparams( &p, progname );
421 	  // ihelp = 16;
422 	}else if(strcmp(progname, "xml2ris") == 0){
423 	  risout_initparams( &p, progname );
424 	  // ihelp = 18;
425 	}else if(strcmp(progname, "xml2wordbib") == 0){
426 	  wordout_initparams( &p, progname );
427 	  // ihelp = 20;
428 	}else if(strcmp(progname, "xml2ads") == 0){
429 	  adsout_initparams( &p, progname );
430 	  // ihelp = 22;
431 	}else if(strcmp(progname,  "xml2bibentry") == 0){
432 	  bibentryout_initparams( &p, progname );
433 	}else {
434 	  bibl_freeparams( &p );
435 	  error("cannot deduce output format from name %s", progname);
436 	}
437 
438 	process_charsets( argc, argv, &p );
439 
440 	process_args( argc, argv, &p, &progname );          // process_args( &argc, argv, &p );
441 
442 	//Georgi
443 	//REprintf("OOOOh: p.latexout: %d, p.charsetout: %d\n", p.latexout, p.charsetout );
444 
445 	*nref = bibprog( argc[0], argv, &p, outfile );   // bibprog( argc, argv, &p );
446 
447 	// Georgi, no need to print, returned to caller
448 	// if( p.progname ) REprintf( "%s: ", p.progname );
449 	// REprintf( "processed %g references.\n", *nref );
450 
451 
452 	bibl_freeparams( &p );
453 	// return EXIT_SUCCESS;
454 }
455 
456