1 /*
2 * bibcore.c
3 *
4 * Copyright (c) Chris Putnam 2005-2020
5 *
6 * Source code released under the GPL version 2
7 *
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include "bibutils.h"
12
13 /* internal includes */
14 #include "reftypes.h"
15 #include "charsets.h"
16 #include "str_conv.h"
17 #include "is_ws.h"
18
19 /* illegal modes to pass in, but use internally for consistency */
20 #define BIBL_INTERNALIN (BIBL_LASTIN+1)
21 #define BIBL_INTERNALOUT (BIBL_LASTOUT+1)
22
23 #define debug_set( p ) ( (p)->verbose > 1 )
24 #define verbose_set( p ) ( (p)->verbose )
25
26 // Georgi was: report_params( FILE *fp, const char *f, param *p )
27 // removed argument fp and hardcoded printing to stderr
28 static void
report_params(const char * f,param * p)29 report_params( const char *f, param *p )
30 {
31 // fflush( NULL );
32
33 REprintf( "-------------------params start for %s\n", f );
34 REprintf( "\tprogname='%s'\n\n", p->progname );
35
36 REprintf( "\treadformat=%d", p->readformat );
37 switch ( p->readformat ) {
38 case BIBL_INTERNALIN: REprintf( " (BIBL_INTERNALIN)\n" ); break;
39 case BIBL_MODSIN: REprintf( " (BIBL_MODSIN)\n" ); break;
40 case BIBL_BIBTEXIN: REprintf( " (BIBL_BIBTEXIN)\n" ); break;
41 case BIBL_RISIN: REprintf( " (BIBL_RISIN)\n" ); break;
42 case BIBL_ENDNOTEIN: REprintf( " (BIBL_ENDNOTEIN)\n" ); break;
43 case BIBL_COPACIN: REprintf( " (BIBL_COPACIN)\n" ); break;
44 case BIBL_ISIIN: REprintf( " (BIBL_ISIIN)\n" ); break;
45 case BIBL_MEDLINEIN: REprintf( " (BIBL_MEDLINEIN)\n" ); break;
46 case BIBL_ENDNOTEXMLIN: REprintf( " (BIBL_ENDNOTEXMLIN)\n" ); break;
47 case BIBL_BIBLATEXIN: REprintf( " (BIBL_BIBLATEXIN)\n" ); break;
48 case BIBL_EBIIN: REprintf( " (BIBL_EBIIN)\n" ); break;
49 case BIBL_WORDIN: REprintf( " (BIBL_WORDIN)\n" ); break;
50 case BIBL_NBIBIN: REprintf( " (BIBL_NBIBIN)\n" ); break;
51 default: REprintf( " (Illegal value)\n" ); break;
52 }
53 REprintf( "\tcharsetin=%d\n", p->charsetin );
54 REprintf( "\tcharsetin_src=%d", p->charsetin_src );
55 switch ( p->charsetin_src ) {
56 case BIBL_SRC_DEFAULT: REprintf( " (BIBL_SRC_DEFAULT)\n" ); break;
57 case BIBL_SRC_FILE: REprintf( " (BIBL_SRC_FILE)\n" ); break;
58 case BIBL_SRC_USER: REprintf( " (BIBL_SRC_USER)\n" ); break;
59 default: REprintf( " (Illegal value)\n" ); break;
60 }
61 REprintf( "\tutf8in=%d\n", p->utf8in );
62 REprintf( "\tlatexin=%d\n", p->latexin );
63 REprintf( "\txmlin=%d\n\n", p->xmlin );
64
65 REprintf( "\twriteformat=%d", p->writeformat );
66 switch ( p->writeformat ) {
67 case BIBL_INTERNALOUT: REprintf( " (BIBL_INTERNALOUT)\n" ); break;
68 case BIBL_ADSABSOUT: REprintf( " (BIBL_ADSABSOUT)\n" ); break;
69 case BIBL_BIBTEXOUT: REprintf( " (BIBL_BIBTEXOUT)\n" ); break;
70 case BIBL_ENDNOTEOUT: REprintf( " (BIBL_ENDNOTEOUT)\n" ); break;
71 case BIBL_ISIOUT: REprintf( " (BIBL_ISIOUT)\n" ); break;
72 case BIBL_MODSOUT: REprintf( " (BIBL_MODSOUT)\n" ); break;
73 case BIBL_NBIBOUT: REprintf( " (BIBL_NBIBOUT)\n" ); break;
74 case BIBL_RISOUT: REprintf( " (BIBL_RISOUT)\n" ); break;
75 case BIBL_WORD2007OUT: REprintf( " (BIBL_WORD2007OUT)\n" ); break;
76 default: REprintf( " (Illegal value)\n"); break;
77 }
78 REprintf( "\tcharsetout=%d\n", p->charsetout );
79 REprintf( "\tcharsetout_src=%d", p->charsetout_src );
80 switch ( p->charsetout_src ) {
81 case BIBL_SRC_DEFAULT: REprintf( " (BIBL_SRC_DEFAULT)\n" ); break;
82 case BIBL_SRC_FILE: REprintf( " (BIBL_SRC_FILE)\n" ); break;
83 case BIBL_SRC_USER: REprintf( " (BIBL_SRC_USER)\n" ); break;
84 default: REprintf( " (Illegal value)\n" ); break;
85 }
86 REprintf( "\tutf8out=%d\n", p->utf8out );
87 REprintf( "\tutf8bom=%d\n", p->utf8bom );
88 REprintf( "\tlatexout=%d\n", p->latexout );
89 REprintf( "\txmlout=%d\n", p->xmlout );
90 REprintf( "-------------------params end for %s\n", f );
91
92 // fflush( fp );
93 }
94
95 /* bibl_duplicateparams()
96 *
97 * Returns status of BIBL_OK or BIBL_ERR_MEMERR
98 */
99 static int
bibl_duplicateparams(param * np,param * op)100 bibl_duplicateparams( param *np, param *op )
101 {
102 int status;
103
104 slist_init( &(np->asis) );
105 status = slist_copy( &(np->asis), &(op->asis ) );
106 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
107
108 slist_init( &(np->corps) );
109 status = slist_copy( &(np->corps), &(op->corps ) );
110 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
111
112 if ( !op->progname ) np->progname = NULL;
113 else {
114 np->progname = strdup( op->progname );
115 if ( !np->progname ) return BIBL_ERR_MEMERR;
116 }
117
118 np->readformat = op->readformat;
119 np->charsetin = op->charsetin;
120 np->charsetin_src = op->charsetin_src;
121 np->utf8in = op->utf8in;
122 np->latexin = op->latexin;
123 np->xmlin = op->xmlin;
124
125 np->writeformat = op->writeformat;
126 np->charsetout = op->charsetout;
127 np->charsetout_src = op->charsetout_src;
128 np->utf8out = op->utf8out;
129 np->utf8bom = op->utf8bom;
130 np->latexout = op->latexout;
131 np->xmlout = op->xmlout;
132 np->nosplittitle = op->nosplittitle;
133
134 np->verbose = op->verbose;
135 np->format_opts = op->format_opts;
136 np->addcount = op->addcount;
137 np->output_raw = op->output_raw;
138 np->singlerefperfile = op->singlerefperfile;
139
140 np->readf = op->readf;
141 np->processf = op->processf;
142 np->cleanf = op->cleanf;
143 np->typef = op->typef;
144 np->convertf = op->convertf;
145 np->headerf = op->headerf;
146 np->footerf = op->footerf;
147 np->assemblef = op->assemblef;
148 np->writef = op->writef;
149
150 np->all = op->all;
151 np->nall = op->nall;
152
153 return BIBL_OK;
154 }
155
156 /* bibl_setreadparams()
157 *
158 * Returns status of BIBL_OK or BIBL_ERR_MEMERR
159 */
160 static int
bibl_setreadparams(param * np,param * op)161 bibl_setreadparams( param *np, param *op )
162 {
163 int status;
164 status = bibl_duplicateparams( np, op );
165 if ( status == BIBL_OK ) {
166 np->utf8out = 1;
167 np->charsetout = BIBL_CHARSET_UNICODE;
168 np->charsetout_src = BIBL_SRC_DEFAULT;
169 np->xmlout = BIBL_XMLOUT_FALSE;
170 np->latexout = 0;
171 np->writeformat = BIBL_INTERNALOUT;
172 }
173 return status;
174 }
175
176 /* bibl_setwriteparams()
177 *
178 * Returns status of BIBL_OK or BIBL_ERR_MEMERR
179 */
180 static int
bibl_setwriteparams(param * np,param * op)181 bibl_setwriteparams( param *np, param *op )
182 {
183 int status;
184 status = bibl_duplicateparams( np, op );
185 if ( status == BIBL_OK ) {
186 np->xmlin = 0;
187 np->latexin = 0;
188 np->utf8in = 1;
189 np->charsetin = BIBL_CHARSET_UNICODE;
190 np->charsetin_src = BIBL_SRC_DEFAULT;
191 np->readformat = BIBL_INTERNALIN;
192 }
193 return status;
194 }
195
196 void
bibl_freeparams(param * p)197 bibl_freeparams( param *p )
198 {
199 if ( p ) {
200 slist_free( &(p->asis) );
201 slist_free( &(p->corps) );
202 if ( p->progname ) free( p->progname );
203 }
204 }
205
206 int
bibl_readasis(param * p,char * f)207 bibl_readasis( param *p, char *f )
208 {
209 int status;
210
211 if ( !p ) return BIBL_ERR_BADINPUT;
212 if ( !f ) return BIBL_ERR_BADINPUT;
213
214 status = slist_fill( &(p->asis), f, 1 );
215
216 if ( status == SLIST_ERR_CANTOPEN ) return BIBL_ERR_CANTOPEN;
217 else if ( status == SLIST_ERR_MEMERR ) return BIBL_ERR_MEMERR;
218 return BIBL_OK;
219 }
220
221 int
bibl_readcorps(param * p,char * f)222 bibl_readcorps( param *p, char *f )
223 {
224 int status;
225
226 if ( !p ) return BIBL_ERR_BADINPUT;
227 if ( !f ) return BIBL_ERR_BADINPUT;
228
229 status = slist_fill( &(p->corps), f, 1 );
230
231 if ( status == SLIST_ERR_CANTOPEN ) return BIBL_ERR_CANTOPEN;
232 else if ( status == 0 ) return BIBL_ERR_MEMERR;
233 return BIBL_OK;
234 }
235
236 /* bibl_addtoasis()
237 *
238 * Returns BIBL_OK or BIBL_ERR_MEMERR
239 */
240 int
bibl_addtoasis(param * p,char * d)241 bibl_addtoasis( param *p, char *d )
242 {
243 int status;
244
245 if ( !p ) return BIBL_ERR_BADINPUT;
246 if ( !d ) return BIBL_ERR_BADINPUT;
247
248 status = slist_addc( &(p->asis), d );
249
250 return ( status==SLIST_OK )? BIBL_OK : BIBL_ERR_MEMERR;
251 }
252
253 /* bibl_addtocorps()
254 *
255 * Returns BIBL_OK or BIBL_ERR_MEMERR
256 */
257 int
bibl_addtocorps(param * p,char * d)258 bibl_addtocorps( param *p, char *d )
259 {
260 int status;
261
262 if ( !p ) return BIBL_ERR_BADINPUT;
263 if ( !d ) return BIBL_ERR_BADINPUT;
264
265 status = slist_addc( &(p->corps), d );
266
267 return ( status==SLIST_OK )? BIBL_OK : BIBL_ERR_MEMERR;
268 }
269
270 void
bibl_reporterr(int err)271 bibl_reporterr( int err )
272 {
273 REprintf( "Bibutils: " );
274 switch( err ) {
275 case BIBL_OK:
276 REprintf( "No error." ); break;
277 case BIBL_ERR_BADINPUT:
278 REprintf( "Bad input." ); break;
279 case BIBL_ERR_MEMERR:
280 REprintf( "Memory error." ); break;
281 case BIBL_ERR_CANTOPEN:
282 REprintf( "Can't open." ); break;
283 default:
284 REprintf( "Cannot identify error code %d.", err ); break;
285 }
286 REprintf( "\n" );
287 }
288
289 static int
bibl_illegalinmode(int mode)290 bibl_illegalinmode( int mode )
291 {
292 if ( mode < BIBL_FIRSTIN || mode > BIBL_LASTIN ) return 1;
293 else return 0;
294 }
295
296 static int
bibl_illegaloutmode(int mode)297 bibl_illegaloutmode( int mode )
298 {
299 if ( mode < BIBL_FIRSTOUT || mode > BIBL_LASTOUT ) return 1;
300 else return 0;
301 }
302
303 static void
bibl_verbose_reference(fields * f,char * filename,long refnum)304 bibl_verbose_reference( fields *f, char *filename, long refnum )
305 {
306 int i, n;
307 n = fields_num( f );
308 REprintf( "======== %s %ld : converted\n", filename, refnum );
309 for ( i=0; i<n; ++i ) {
310 // REprintf( "'%s'='%s' level=%d\n",
311 // (char*) fields_tag( f, i, FIELDS_CHRP_NOUSE ),
312 // (char*) fields_value( f, i, FIELDS_CHRP_NOUSE ),
313 // fields_level( f, i ) );
314 // Georgi
315 REprintf( "'%s'='%s' level=%d; ",
316 (char*) fields_tag( f, i, FIELDS_CHRP_NOUSE ),
317 (char*) fields_value( f, i, FIELDS_CHRP_NOUSE ),
318 fields_level( f, i ) );
319 REprintf( " \n" );
320 unsigned char *val = (unsigned char*) fields_value( f, i, FIELDS_CHRP_NOUSE );
321 int len = strlen((const char *)val);
322 for(int j = 0; j < len ; j++){
323 REprintf(" %x", val[j]);
324 }
325 REprintf( "\n" );
326
327 }
328 REprintf( "\n" );
329 }
330
331 static void
bibl_verbose(bibl * bin,const char * msg1,const char * msg2)332 bibl_verbose( bibl *bin, const char *msg1, const char *msg2 )
333 {
334 long i;
335 // fflush( stdout );
336 REprintf( "-------------------%s begin %s\n", msg1, msg2);
337 for ( i=0; i<bin->n; ++i )
338 bibl_verbose_reference( bin->ref[i], "", i+1 );
339 REprintf( "-------------------%s end %s\n", msg1, msg2);
340 // fflush( stderr );
341 }
342
343
344 /* extract_tag_value
345 *
346 * Extract the tag and the value for ALWAYS/DEFAULT
347 * entries like: "GENRE:BIBUTILS|Masters thesis"
348 *
349 * tag = "GENRE:BIBUTILS"
350 * value = "Masters thesis"
351 */
352 static int
extract_tag_value(str * tag,str * value,char * p)353 extract_tag_value( str *tag, str *value, char *p )
354 {
355 str_empty( tag );
356 while ( p && *p && *p!='|' ) {
357 str_addchar( tag, *p );
358 p++;
359 }
360 if ( str_memerr( tag ) ) return BIBL_ERR_MEMERR;
361
362 if ( p && *p=='|' ) p++;
363
364 str_empty( value );
365 while ( p && *p ) {
366 str_addchar( value, *p );
367 p++;
368 }
369 if ( str_memerr( tag ) ) return BIBL_ERR_MEMERR;
370
371 return BIBL_OK;
372 }
373
374 /* process_defaultadd()
375 *
376 * Add tag/value pairs that have "DEFAULT" processing
377 * unless a tag/value pair with the same tag has already
378 * been adding during reference processing.
379 */
380 static int
process_defaultadd(fields * f,int reftype,param * r)381 process_defaultadd( fields *f, int reftype, param *r )
382 {
383 int i, n, process, level, status, ret = BIBL_OK;
384 str tag, value;
385 char *p;
386
387 strs_init( &tag, &value, NULL );
388
389 for ( i=0; i<r->all[reftype].ntags; ++i ) {
390
391 process = ((r->all[reftype]).tags[i]).processingtype;
392 if ( process!=DEFAULT ) continue;
393
394 level = ((r->all[reftype]).tags[i]).level;
395 p = ((r->all[reftype]).tags[i]).newstr;
396
397 status = extract_tag_value( &tag, &value, p );
398 if ( status!=BIBL_OK ) {
399 ret = status;
400 goto out;
401 }
402
403 n = fields_find( f, tag.data, level );
404 if ( n==FIELDS_NOTFOUND ) {
405 status = fields_add( f, tag.data, value.data, level );
406 if ( status!=FIELDS_OK ) {
407 ret = BIBL_ERR_MEMERR;
408 goto out;
409 }
410 }
411
412 }
413 out:
414 strs_free( &tag, &value, NULL );
415
416 return ret;
417 }
418
419 /* process_alwaysadd()
420 *
421 * Add tag/value pair to reference from the ALWAYS
422 * processing type without exception (the difference from
423 * DEFAULT processing).
424 */
425 static int
process_alwaysadd(fields * f,int reftype,param * r)426 process_alwaysadd( fields *f, int reftype, param *r )
427 {
428 int i, process, level, status, ret = BIBL_OK;
429 str tag, value;
430 char *p;
431
432 strs_init( &tag, &value, NULL );
433
434 for ( i=0; i<r->all[reftype].ntags; ++i ) {
435
436 process = ((r->all[reftype]).tags[i]).processingtype;
437 if ( process!=ALWAYS ) continue;
438
439 level = ((r->all[reftype]).tags[i]).level;
440 p = ((r->all[reftype]).tags[i]).newstr;
441
442 status = extract_tag_value( &tag, &value, p );
443 if ( status!=BIBL_OK ) {
444 ret = status;
445 goto out;
446 }
447
448 status = fields_add( f, tag.data, value.data, level );
449 if ( status!=FIELDS_OK ) {
450 ret = BIBL_ERR_MEMERR;
451 goto out;
452 }
453 }
454
455 out:
456 strs_free( &tag, &value, NULL );
457
458 return ret;
459 }
460
461 static int
read_refs(FILE * fp,bibl * bin,char * filename,param * p)462 read_refs( FILE *fp, bibl *bin, char *filename, param *p )
463 {
464 int refnum = 0, bufpos = 0, ret=BIBL_OK, fcharset;/* = CHARSET_UNKNOWN;*/
465 str reference, line;
466 char buf[256]="";
467 fields *ref;
468
469 str_init( &reference );
470 str_init( &line );
471 while ( p->readf( fp, buf, sizeof(buf), &bufpos, &line, &reference, &fcharset ) ) {
472 if ( reference.len==0 ) continue;
473 ref = fields_new();
474 if ( !ref ) {
475 ret = BIBL_ERR_MEMERR;
476 bibl_free( bin );
477 goto out;
478 }
479 if ( p->processf( ref, reference.data, filename, refnum+1, p )){
480 ret = bibl_addref( bin, ref );
481
482 // fields_report_stderr(ref); // Georgi, for tests
483
484 if ( ret!=BIBL_OK ) {
485 bibl_free( bin );
486 fields_delete( ref );
487 goto out;
488 }
489 refnum += 1;
490 } else {
491 fields_delete( ref );
492 }
493 str_empty( &reference );
494 if ( fcharset!=CHARSET_UNKNOWN ) {
495 /* charset from file takes priority over default, but
496 * not user-specified */
497 if ( p->charsetin_src!=BIBL_SRC_USER ) {
498 p->charsetin_src = BIBL_SRC_FILE;
499 p->charsetin = fcharset;
500 if ( fcharset!=CHARSET_UNICODE ) p->utf8in = 0;
501 }
502 }
503 }
504 if ( p->charsetin==CHARSET_UNICODE ) p->utf8in = 1;
505 out:
506 str_free( &line );
507 str_free( &reference );
508 return ret;
509 }
510
511 /* Don't manipulate latex for URL's and the like */
512 static int
bibl_notexify(char * tag)513 bibl_notexify( char *tag )
514 {
515 char *protected[] = { "DOI", "URL", "REFNUM", "FILEATTACH", "FILE" };
516 int i, nprotected = sizeof( protected ) / sizeof( protected[0] );
517 for ( i=0; i<nprotected; ++i )
518 if ( !strcasecmp( tag, protected[i] ) ) return 1;
519 return 0;
520 }
521
522 /* bibl_fixcharsetdata()
523 *
524 * returns BIBL_OK or BIBL_ERR_MEMERR
525 */
526 static int
bibl_fixcharsetdata(fields * ref,param * p)527 bibl_fixcharsetdata( fields *ref, param *p )
528 {
529 str *data;
530 char *tag;
531 long i, n;
532 int ok;
533
534 n = fields_num( ref );
535
536 for ( i=0; i<n; ++i ) {
537
538 tag = fields_tag( ref, i, FIELDS_CHRP_NOUSE );
539 data = fields_value( ref, i, FIELDS_STRP_NOUSE );
540
541 // Georgi:
542 // REprintf("p->latexin: %d, p->charsetin: %d\n", p->latexin, p->charsetin );
543 // REprintf("p->latexout: %d, p->charsetout: %d\n", p->latexout, p->charsetout );
544
545 if ( bibl_notexify( tag ) ) {
546 ok = str_convert( data,
547 p->charsetin, 0, p->utf8in, p->xmlin,
548 p->charsetout, 0, p->utf8out, p->xmlout );
549 } else {
550 ok = str_convert( data,
551 p->charsetin, p->latexin, p->utf8in, p->xmlin,
552 p->charsetout, p->latexout, p->utf8out, p->xmlout );
553 }
554
555 if ( !ok ) return BIBL_ERR_MEMERR;
556 }
557
558 return BIBL_OK;
559 }
560
561 /* bibl_fixcharsets()
562 *
563 * returns BIBL_OK or BIBL_ERR_MEMERR
564 */
565 static int
bibl_fixcharsets(bibl * b,param * p)566 bibl_fixcharsets( bibl *b, param *p )
567 {
568 int status;
569 long i;
570
571 for ( i=0; i<b->n; ++i ) {
572 status = bibl_fixcharsetdata( b->ref[i], p );
573 if ( status!=BIBL_OK ) return status;
574 }
575
576 return BIBL_OK;
577 }
578
579 static int
bibl_addcount(bibl * b)580 bibl_addcount( bibl *b )
581 {
582 char buf[512];
583 fields *ref;
584 long i;
585 int n;
586
587 for ( i=0; i<b->n; ++i ) {
588
589 ref = b->ref[i];
590
591 n = fields_find( ref, "REFNUM", LEVEL_MAIN );
592 if ( n==FIELDS_NOTFOUND ) continue;
593
594 sprintf( buf, "_%ld", i+1 );
595 str_strcatc( fields_value( ref, n, FIELDS_STRP_NOUSE ), buf );
596 if ( str_memerr( fields_value( ref, n, FIELDS_STRP_NOUSE ) ) ) {
597 return BIBL_ERR_MEMERR;
598 }
599
600 }
601
602 return BIBL_OK;
603 }
604
605 static int
generate_citekey(fields * f,long nref)606 generate_citekey( fields *f, long nref )
607 {
608 int n1, n2, status, ret;
609 char *p, buf[100];
610 str citekey;
611
612 str_init( &citekey );
613
614 n1 = fields_find( f, "AUTHOR", LEVEL_MAIN );
615 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:ASIS", LEVEL_MAIN );
616 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:CORP", LEVEL_MAIN );
617 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR", LEVEL_ANY );
618 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:ASIS", LEVEL_ANY );
619 if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:CORP", LEVEL_ANY );
620
621 n2 = fields_find( f, "DATE:YEAR", LEVEL_MAIN );
622 if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "DATE:YEAR", LEVEL_ANY );
623 if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "PARTDATE:YEAR", LEVEL_MAIN );
624 if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "PARTDATE:YEAR", LEVEL_ANY );
625
626 if ( n1!=FIELDS_NOTFOUND && n2!=FIELDS_NOTFOUND ) {
627
628 p = fields_value( f, n1, FIELDS_CHRP_NOUSE );
629 while ( p && *p && *p!='|' ) {
630 if ( !is_ws( *p ) ) str_addchar( &citekey, *p );
631 p++;
632 }
633
634 p = fields_value( f, n2, FIELDS_CHRP_NOUSE );
635 while ( p && *p ) {
636 if ( !is_ws( *p ) ) str_addchar( &citekey, *p );
637 p++;
638 }
639
640 }
641
642 else {
643 sprintf( buf, "ref%ld", nref );
644 str_strcpyc( &citekey, buf );
645 }
646
647 if ( str_memerr( &citekey ) ) {
648 ret = -1;
649 goto out;
650 }
651
652 status = fields_add( f, "REFNUM", str_cstr( &citekey ), LEVEL_MAIN );
653 if ( status!=FIELDS_OK ) {
654 ret = -1;
655 goto out;
656 }
657
658 ret = fields_find( f, "REFNUM", LEVEL_MAIN );
659 out:
660 str_free( &citekey );
661 return ret;
662 }
663
664 static int
get_citekeys(bibl * bin,slist * citekeys)665 get_citekeys( bibl *bin, slist *citekeys )
666 {
667 int n, status;
668 fields *f;
669 long i;
670
671 for ( i=0; i<bin->n; ++i ) {
672 f = bin->ref[i];
673 n = fields_find( f, "REFNUM", LEVEL_ANY );
674 if ( n==FIELDS_NOTFOUND ) n = generate_citekey( f, i+1 );
675 if ( n!=FIELDS_NOTFOUND && fields_has_value( f, n ) ) {
676 status = slist_add( citekeys, fields_value( f, n, FIELDS_STRP_NOUSE ) );
677 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
678 } else {
679 status = slist_addc( citekeys, "" );
680 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
681 }
682 }
683
684 return BIBL_OK;
685 }
686
687 static int
identify_duplicates(bibl * b,slist * citekeys,int * dup)688 identify_duplicates( bibl *b, slist *citekeys, int *dup )
689 {
690 int i, j, ndup = 0;
691
692 for ( i=0; i<citekeys->n-1; ++i ) {
693 if ( dup[i]!=-1 ) continue;
694 for ( j=i+1; j<citekeys->n; ++j ) {
695 if ( !strcmp( slist_cstr( citekeys, i ),
696 slist_cstr( citekeys, j ) ) ) {
697 dup[i] = i;
698 dup[j] = i;
699 ndup++;
700 }
701 }
702 }
703
704 return ndup;
705 }
706
707 static int
build_new_citekey(int nsame,str * old_citekey,str * new_citekey)708 build_new_citekey( int nsame, str *old_citekey, str *new_citekey )
709 {
710 const char abc[]="abcdefghijklmnopqrstuvwxyz";
711
712 str_strcpy( new_citekey, old_citekey );
713
714 while ( nsame >= 26 ) {
715 str_addchar( new_citekey, 'a' );
716 nsame -= 26;
717 }
718
719 if ( nsame>=0 ) str_addchar( new_citekey, abc[nsame] );
720
721 return ( str_memerr( new_citekey ) ) ? BIBL_ERR_MEMERR : BIBL_OK;
722 }
723
724 static int
resolve_duplicates(bibl * b,slist * citekeys,int * dup)725 resolve_duplicates( bibl *b, slist *citekeys, int *dup )
726 {
727 int nsame, n, i, j, status = BIBL_OK;
728 str new_citekey, *ref_citekey;
729
730 str_init( &new_citekey );
731
732 for ( i=0; i<citekeys->n; ++i ) {
733
734 if ( dup[i]==-1 ) continue;
735
736 nsame = 0;
737
738 for ( j=i; j<citekeys->n; ++j ) {
739
740 if ( dup[j]!=i ) continue;
741
742 dup[j] = -1;
743
744 status = build_new_citekey( nsame, slist_str( citekeys, j ), &new_citekey );
745 if ( status!=BIBL_OK ) goto out;
746
747 n = fields_find( b->ref[j], "REFNUM", LEVEL_ANY );
748 if ( n==FIELDS_NOTFOUND ) continue;
749
750 ref_citekey = fields_value( b->ref[j], n, FIELDS_STRP_NOUSE );
751
752 str_strcpy( ref_citekey, &new_citekey );
753 if ( str_memerr( ref_citekey ) ) { status = BIBL_ERR_MEMERR; goto out; }
754
755 nsame++;
756 }
757 }
758 out:
759 str_free( &new_citekey );
760 return status;
761 }
762
763 static int
identify_and_resolve_duplicate_citekeys(bibl * b,slist * citekeys)764 identify_and_resolve_duplicate_citekeys( bibl *b, slist *citekeys )
765 {
766 int i, *dup, ndup, status=BIBL_OK;
767
768 dup = ( int * ) malloc( sizeof( int ) * citekeys->n );
769 if ( !dup ) return BIBL_ERR_MEMERR;
770 for ( i=0; i<citekeys->n; ++i ) dup[i] = -1;
771
772 ndup = identify_duplicates( b, citekeys, dup );
773
774 if ( ndup ) status = resolve_duplicates( b, citekeys, dup );
775
776 free( dup );
777 return status;
778 }
779
780 static int
uniqueify_citekeys(bibl * bin)781 uniqueify_citekeys( bibl *bin )
782 {
783 slist citekeys;
784 int status;
785
786 slist_init( &citekeys );
787
788 status = get_citekeys( bin, &citekeys );
789 if ( status!=BIBL_OK ) goto out;
790
791 status = identify_and_resolve_duplicate_citekeys( bin, &citekeys );
792 out:
793 slist_free( &citekeys );
794 return status;
795 }
796
797 static int
clean_refs(bibl * bin,param * p)798 clean_refs( bibl *bin, param *p )
799 {
800 if ( p->cleanf ) return p->cleanf( bin, p );
801 else return BIBL_OK;
802 }
803
804 static int
convert_refs(bibl * bin,char * fname,bibl * bout,param * p)805 convert_refs( bibl *bin, char *fname, bibl *bout, param *p )
806 {
807 int reftype = 0, status;
808 fields *rin, *rout;
809 long i;
810
811 // REprintf("convert_refs: in convert_refs!\n");
812
813 // REprintf("convert_refs: bib->n = %d\n", bin->n);
814 for ( i=0; i<bin->n; ++i ) {
815 // REprintf("convert_refs: i = %d\n", i);
816 rin = bin->ref[i];
817
818 // fields_report_stderr( rin ); // Testing only !!!!!!!!!!!!!!!1
819
820 rout = fields_new();
821 if ( !rout ) return BIBL_ERR_MEMERR;
822
823 if ( p->typef ) reftype = p->typef( rin, fname, i+1, p );
824
825 // REprintf("convert_refs: before p->convertf\n");
826 status = p->convertf( rin, rout, reftype, p );
827 // REprintf("convert_refs: after p->convertf\n");
828 if ( status!=BIBL_OK ) return status;
829
830 if ( p->all ) {
831 status = process_alwaysadd( rout, reftype, p );
832 if ( status!=BIBL_OK ) return status;
833 status = process_defaultadd( rout, reftype, p );
834 if ( status!=BIBL_OK ) return status;
835 }
836
837 status = bibl_addref( bout, rout );
838 if ( status!=BIBL_OK ) return status;
839 }
840
841 // REprintf("convert_refs: end of convert_refs!\n");
842 return BIBL_OK;
843 }
844
845 int
bibl_read(bibl * b,FILE * fp,char * filename,param * p)846 bibl_read( bibl *b, FILE *fp, char *filename, param *p )
847 {
848 int status = BIBL_OK;
849 param read_params;
850 bibl bin;
851 // REprintf("(bibl_read) in bibl_read!\n");
852
853 if ( !b ) return BIBL_ERR_BADINPUT;
854 if ( !fp ) return BIBL_ERR_BADINPUT;
855 if ( !p ) return BIBL_ERR_BADINPUT;
856
857 if ( bibl_illegalinmode( p->readformat ) ) {
858 if ( debug_set( p ) ) report_params( "bibl_read", p );
859 return BIBL_ERR_BADINPUT;
860 }
861
862 // REprintf("(bibl_read) after bibl_illegalinmode\n");
863
864 status = bibl_setreadparams( &read_params, p );
865
866 // REprintf("(bibl_read) after bibl_setreadparams\n");
867
868 if ( status!=BIBL_OK ) {
869 if ( debug_set( p ) ) report_params( "bibl_read", p );
870 return status;
871 }
872
873 if ( debug_set( &read_params ) ) {
874 report_params( "bibl_read", &read_params );
875 }
876
877 bibl_init( &bin );
878
879
880 // REprintf("(bibl_read) before read_refs\n");
881
882 status = read_refs( fp, &bin, filename, &read_params );
883 if ( status!=BIBL_OK ) {
884 if ( debug_set( &read_params ) ) report_params( "bibl_read", &read_params );
885 bibl_freeparams( &read_params );
886 return status;
887 }
888
889 // // Georgi: for testing
890 // REprintf("bibl_read: (after(read_refs)\n");
891 // for(long i = 0; i < bin.n; ++i) {
892 // fields_report_stderr( bin.ref[i] );
893 // }
894
895 if ( debug_set( &read_params ) ) {
896 bibl_verbose( &bin, "raw_input", "for bibl_read" );
897 }
898
899 if ( !read_params.output_raw || ( read_params.output_raw & BIBL_RAW_WITHCLEAN )) {
900 status = clean_refs( &bin, &read_params );
901 if ( status!=BIBL_OK ) goto out;
902 if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_clean_refs", "for bibl_read" );
903 }
904
905 // // Georgi: for testing
906 // REprintf("bibl_read: (after(clean_refs)\n");
907 // for(long i = 0; i < bin.n; ++i) {
908 // fields_report_stderr( bin.ref[i] );
909 // }
910
911 if ( ( !read_params.output_raw ) || ( read_params.output_raw & BIBL_RAW_WITHCHARCONVERT ) ) {
912 status = bibl_fixcharsets( &bin, &read_params );
913 if ( status!=BIBL_OK ) goto out;
914 if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_fixcharsets", "for bibl_read" );
915 }
916
917 // REprintf("bibl_read: (after(bibl_fixcharsets)\n");
918 // Georgi: for testing
919 // for(long i = 0; i < bin.n; ++i) {
920 // fields_report_stderr( bin.ref[i] );
921 // }
922
923 if ( !read_params.output_raw ) {
924 // REprintf("bibl_read: before convert_refs; read_params.output_raw is FALSE\n");
925 status = convert_refs( &bin, filename, b, &read_params );
926 if ( status!=BIBL_OK ) goto out;
927 if ( debug_set( &read_params ) ) bibl_verbose( b, "post_convert_refs", "for bibl_read" );
928 }
929
930 else {
931 // REprintf("bibl_read: before convert_refs; read_params.output_raw is TRUE\n");
932 status = bibl_copy( b, &bin );
933 if ( status!=BIBL_OK ) goto out;
934 if ( debug_set( &read_params ) ) bibl_verbose( b, "post_bibl_copy", "for bibl_read" );
935 }
936
937
938 // REprintf("bibl_read: before 'if' and uniquify_citekeys\n");
939
940 if ( ( !read_params.output_raw ) || ( read_params.output_raw & BIBL_RAW_WITHMAKEREFID ) ) {
941 // REprintf("bibl_read: before uniquify_citekeys\n");
942 status = uniqueify_citekeys( b );
943 if ( status!=BIBL_OK ) goto out;
944 if ( read_params.addcount ) {
945 status = bibl_addcount( b );
946 if ( status!=BIBL_OK ) goto out;
947 }
948 if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_uniqueify_citekeys", "for bibl_read" );
949 }
950
951 // // Georgi: for testing
952 // REprintf("\nbibl_read: at end of bibl_read\n");
953 // for(long i = 0; i < b->n; ++i) {
954 // fields_report_stderr( b->ref[i] );
955 // }
956
957 out:
958 bibl_free( &bin );
959 bibl_freeparams( &read_params );
960
961 return status;
962 }
963
964 static FILE *
singlerefname(fields * reffields,long nref,int mode)965 singlerefname( fields *reffields, long nref, int mode )
966 {
967 char outfile[2048];
968 char suffix[5] = "xml";
969 FILE *fp;
970 long count;
971 int found;
972 if ( mode==BIBL_ADSABSOUT ) strcpy( suffix, "ads" );
973 else if ( mode==BIBL_BIBTEXOUT ) strcpy( suffix, "bib" );
974 else if ( mode==BIBL_ENDNOTEOUT ) strcpy( suffix, "end" );
975 else if ( mode==BIBL_ISIOUT ) strcpy( suffix, "isi" );
976 else if ( mode==BIBL_MODSOUT ) strcpy( suffix, "xml" );
977 else if ( mode==BIBL_RISOUT ) strcpy( suffix, "ris" );
978 else if ( mode==BIBL_WORD2007OUT ) strcpy( suffix, "xml" );
979 found = fields_find( reffields, "REFNUM", LEVEL_MAIN );
980 /* find new filename based on reference */
981 if ( found!=-1 ) {
982 sprintf( outfile,"%s.%s",(char*)fields_value(reffields,found,FIELDS_CHRP_NOUSE), suffix );
983 } else sprintf( outfile,"%ld.%s",nref, suffix );
984 count = 0;
985 fp = fopen( outfile, "r" );
986 while ( fp ) {
987 fclose(fp);
988 count++;
989 if ( count==60000 ) return NULL;
990 if ( found!=-1 )
991 sprintf( outfile, "%s_%ld.%s", (char*)fields_value( reffields, found, FIELDS_CHRP_NOUSE ), count, suffix );
992 else sprintf( outfile,"%ld_%ld.%s", nref, count, suffix );
993 fp = fopen( outfile, "r" );
994 }
995 return fopen( outfile, "w" );
996 }
997
998 static int
bibl_writeeachfp(FILE * fp,bibl * b,param * p)999 bibl_writeeachfp( FILE *fp, bibl *b, param *p )
1000 {
1001 fields out, *use = &out;
1002 int status;
1003 long i;
1004
1005 fields_init( &out );
1006
1007 for ( i=0; i<b->n; ++i ) {
1008
1009 fp = singlerefname( b->ref[i], i, p->writeformat );
1010 if ( !fp ) return BIBL_ERR_CANTOPEN;
1011
1012 if ( p->headerf ) p->headerf( fp, p );
1013
1014 if ( p->assemblef ) {
1015 fields_free( &out );
1016 status = p->assemblef( b->ref[i], &out, p, i );
1017 if ( status!=BIBL_OK ) break;
1018 } else {
1019 use = b->ref[i];
1020 }
1021
1022 status = p->writef( use, fp, p, i );
1023
1024 if ( p->footerf ) p->footerf( fp );
1025 fclose( fp );
1026
1027 if ( status!=BIBL_OK ) return status;
1028 }
1029
1030 return BIBL_OK;
1031 }
1032
1033 static int
bibl_writefp(FILE * fp,bibl * b,param * p)1034 bibl_writefp( FILE *fp, bibl *b, param *p )
1035 {
1036 int status = BIBL_OK;
1037 fields out, *use = &out;
1038 long i;
1039
1040 fields_init( &out );
1041
1042 if ( debug_set( p ) && p->assemblef ) {
1043 REprintf( "-------------------assemblef start for bibl_write\n");
1044 }
1045
1046 if ( p->headerf ) p->headerf( fp, p );
1047 for ( i=0; i<b->n; ++i ) {
1048 if ( p->assemblef ) {
1049 fields_free( &out );
1050 // Georgi TODO: it seems that xml2nbib crashes here:
1051 status = p->assemblef( b->ref[i], &out, p, i );
1052 if ( status!=BIBL_OK ) break;
1053 if ( debug_set( p ) ) bibl_verbose_reference( &out, "", i+1 );
1054 } else {
1055 use = b->ref[i];
1056 }
1057
1058 status = p->writef( use, fp, p, i );
1059 if ( status!=BIBL_OK ) break;
1060
1061 }
1062
1063 if ( debug_set( p ) && p->assemblef ) {
1064 REprintf( "-------------------assemblef end for bibl_write\n");
1065 }
1066
1067 if ( p->footerf ) p->footerf( fp );
1068
1069 // Georgi: the above loop doesn't free the last reference
1070 // (fields_free is safe even if it is just initialised, which is the case here
1071 fields_free( &out );
1072
1073 return status;
1074 }
1075
1076 int
bibl_write(bibl * b,FILE * fp,param * p)1077 bibl_write( bibl *b, FILE *fp, param *p )
1078 {
1079 int status;
1080 param lp;
1081
1082 if ( !b ) return BIBL_ERR_BADINPUT;
1083 if ( !p ) return BIBL_ERR_BADINPUT;
1084 if ( bibl_illegaloutmode( p->writeformat ) ) return BIBL_ERR_BADINPUT;
1085 if ( !fp && !p->singlerefperfile ) return BIBL_ERR_BADINPUT;
1086
1087 status = bibl_setwriteparams( &lp, p );
1088 if ( status!=BIBL_OK ) return status;
1089
1090 if ( debug_set( p ) ) {
1091 report_params( "bibl_write", &lp );
1092 // fflush( fp ); // fflush( stdout );
1093 }
1094
1095 if ( debug_set( p ) ) bibl_verbose( b, "raw_input", "for bibl_write" );
1096
1097 status = bibl_fixcharsets( b, &lp );
1098
1099 if ( status!=BIBL_OK ) goto out;
1100
1101 if ( debug_set( p ) ) bibl_verbose( b, "post-fixcharsets", "for bibl_write" );
1102
1103 if ( p->singlerefperfile ) status = bibl_writeeachfp( fp, b, &lp );
1104 else status = bibl_writefp( fp, b, &lp );
1105
1106 out:
1107 bibl_freeparams( &lp );
1108 return status;
1109 }
1110
1111
1112
1113
1114
1115