1 /*
2  * bibcore.c
3  *
4  * Copyright (c) Chris Putnam 2005-2020
5  *
6  * Source code released under the GPL version 2
7  *
8  */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include "bibutils.h"
12 
13 /* internal includes */
14 #include "reftypes.h"
15 #include "charsets.h"
16 #include "str_conv.h"
17 #include "is_ws.h"
18 
19 /* illegal modes to pass in, but use internally for consistency */
20 #define BIBL_INTERNALIN   (BIBL_LASTIN+1)
21 #define BIBL_INTERNALOUT  (BIBL_LASTOUT+1)
22 
23 #define debug_set( p ) ( (p)->verbose > 1 )
24 #define verbose_set( p ) ( (p)->verbose )
25 
26 // Georgi was: report_params( FILE *fp, const char *f, param *p )
27 //     removed argument fp and hardcoded printing to stderr
28 static void
report_params(const char * f,param * p)29 report_params( const char *f, param *p )
30 {
31         // fflush( NULL );
32 
33 	REprintf( "-------------------params start for %s\n", f );
34 	REprintf( "\tprogname='%s'\n\n", p->progname );
35 
36 	REprintf( "\treadformat=%d", p->readformat );
37 	switch ( p->readformat ) {
38 		case BIBL_INTERNALIN:   REprintf( " (BIBL_INTERNALIN)\n" );   break;
39 		case BIBL_MODSIN:       REprintf( " (BIBL_MODSIN)\n" );       break;
40 		case BIBL_BIBTEXIN:     REprintf( " (BIBL_BIBTEXIN)\n" );     break;
41 		case BIBL_RISIN:        REprintf( " (BIBL_RISIN)\n" );        break;
42 		case BIBL_ENDNOTEIN:    REprintf( " (BIBL_ENDNOTEIN)\n" );    break;
43 		case BIBL_COPACIN:      REprintf( " (BIBL_COPACIN)\n" );      break;
44 		case BIBL_ISIIN:        REprintf( " (BIBL_ISIIN)\n" );        break;
45 		case BIBL_MEDLINEIN:    REprintf( " (BIBL_MEDLINEIN)\n" );    break;
46 		case BIBL_ENDNOTEXMLIN: REprintf( " (BIBL_ENDNOTEXMLIN)\n" ); break;
47 		case BIBL_BIBLATEXIN:   REprintf( " (BIBL_BIBLATEXIN)\n" );   break;
48 		case BIBL_EBIIN:        REprintf( " (BIBL_EBIIN)\n" );        break;
49 		case BIBL_WORDIN:       REprintf( " (BIBL_WORDIN)\n" );       break;
50 		case BIBL_NBIBIN:       REprintf( " (BIBL_NBIBIN)\n" );       break;
51 		default:                REprintf( " (Illegal value)\n" );     break;
52 	}
53 	REprintf( "\tcharsetin=%d\n", p->charsetin );
54 	REprintf( "\tcharsetin_src=%d", p->charsetin_src );
55 	switch ( p->charsetin_src ) {
56 		case BIBL_SRC_DEFAULT:  REprintf( " (BIBL_SRC_DEFAULT)\n" ); break;
57 		case BIBL_SRC_FILE:     REprintf( " (BIBL_SRC_FILE)\n" );    break;
58 		case BIBL_SRC_USER:     REprintf( " (BIBL_SRC_USER)\n" );    break;
59 		default:                REprintf( " (Illegal value)\n" );    break;
60 	}
61 	REprintf( "\tutf8in=%d\n", p->utf8in );
62 	REprintf( "\tlatexin=%d\n", p->latexin );
63 	REprintf( "\txmlin=%d\n\n", p->xmlin );
64 
65 	REprintf( "\twriteformat=%d", p->writeformat );
66 	switch ( p->writeformat ) {
67 		case BIBL_INTERNALOUT:  REprintf( " (BIBL_INTERNALOUT)\n" );  break;
68 		case BIBL_ADSABSOUT:    REprintf( " (BIBL_ADSABSOUT)\n" );    break;
69 		case BIBL_BIBTEXOUT:    REprintf( " (BIBL_BIBTEXOUT)\n" );    break;
70 		case BIBL_ENDNOTEOUT:   REprintf( " (BIBL_ENDNOTEOUT)\n" );   break;
71 		case BIBL_ISIOUT:       REprintf( " (BIBL_ISIOUT)\n" );       break;
72 		case BIBL_MODSOUT:      REprintf( " (BIBL_MODSOUT)\n" );      break;
73 		case BIBL_NBIBOUT:      REprintf( " (BIBL_NBIBOUT)\n" );      break;
74 		case BIBL_RISOUT:       REprintf( " (BIBL_RISOUT)\n" );       break;
75 		case BIBL_WORD2007OUT:  REprintf( " (BIBL_WORD2007OUT)\n" );  break;
76 		default:                REprintf( " (Illegal value)\n");      break;
77 	}
78 	REprintf( "\tcharsetout=%d\n", p->charsetout );
79 	REprintf( "\tcharsetout_src=%d", p->charsetout_src );
80 	switch ( p->charsetout_src ) {
81 		case BIBL_SRC_DEFAULT:  REprintf( " (BIBL_SRC_DEFAULT)\n" ); break;
82 		case BIBL_SRC_FILE:     REprintf( " (BIBL_SRC_FILE)\n" );    break;
83 		case BIBL_SRC_USER:     REprintf( " (BIBL_SRC_USER)\n" );    break;
84 		default:                REprintf( " (Illegal value)\n" );    break;
85 	}
86 	REprintf( "\tutf8out=%d\n", p->utf8out );
87 	REprintf( "\tutf8bom=%d\n", p->utf8bom );
88 	REprintf( "\tlatexout=%d\n", p->latexout );
89 	REprintf( "\txmlout=%d\n", p->xmlout );
90 	REprintf( "-------------------params end for %s\n", f );
91 
92 	// fflush( fp );
93 }
94 
95 /* bibl_duplicateparams()
96  *
97  * Returns status of BIBL_OK or BIBL_ERR_MEMERR
98  */
99 static int
bibl_duplicateparams(param * np,param * op)100 bibl_duplicateparams( param *np, param *op )
101 {
102 	int status;
103 
104 	slist_init( &(np->asis) );
105 	status = slist_copy( &(np->asis), &(op->asis ) );
106 	if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
107 
108 	slist_init( &(np->corps) );
109 	status = slist_copy( &(np->corps), &(op->corps ) );
110 	if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
111 
112 	if ( !op->progname ) np->progname = NULL;
113 	else {
114 		np->progname = strdup( op->progname );
115 		if ( !np->progname ) return BIBL_ERR_MEMERR;
116 	}
117 
118 	np->readformat    = op->readformat;
119 	np->charsetin     = op->charsetin;
120 	np->charsetin_src = op->charsetin_src;
121 	np->utf8in        = op->utf8in;
122 	np->latexin       = op->latexin;
123 	np->xmlin         = op->xmlin;
124 
125 	np->writeformat    = op->writeformat;
126 	np->charsetout     = op->charsetout;
127 	np->charsetout_src = op->charsetout_src;
128 	np->utf8out        = op->utf8out;
129 	np->utf8bom        = op->utf8bom;
130 	np->latexout       = op->latexout;
131 	np->xmlout         = op->xmlout;
132 	np->nosplittitle   = op->nosplittitle;
133 
134 	np->verbose          = op->verbose;
135 	np->format_opts      = op->format_opts;
136 	np->addcount         = op->addcount;
137 	np->output_raw       = op->output_raw;
138 	np->singlerefperfile = op->singlerefperfile;
139 
140 	np->readf     = op->readf;
141 	np->processf  = op->processf;
142 	np->cleanf    = op->cleanf;
143 	np->typef     = op->typef;
144 	np->convertf  = op->convertf;
145 	np->headerf   = op->headerf;
146 	np->footerf   = op->footerf;
147 	np->assemblef = op->assemblef;
148 	np->writef    = op->writef;
149 
150 	np->all       = op->all;
151 	np->nall      = op->nall;
152 
153 	return BIBL_OK;
154 }
155 
156 /* bibl_setreadparams()
157  *
158  * Returns status of BIBL_OK or BIBL_ERR_MEMERR
159  */
160 static int
bibl_setreadparams(param * np,param * op)161 bibl_setreadparams( param *np, param *op )
162 {
163 	int status;
164 	status = bibl_duplicateparams( np, op );
165 	if ( status == BIBL_OK ) {
166 		np->utf8out        = 1;
167 		np->charsetout     = BIBL_CHARSET_UNICODE;
168 		np->charsetout_src = BIBL_SRC_DEFAULT;
169 		np->xmlout         = BIBL_XMLOUT_FALSE;
170 		np->latexout       = 0;
171 		np->writeformat    = BIBL_INTERNALOUT;
172 	}
173 	return status;
174 }
175 
176 /* bibl_setwriteparams()
177  *
178  * Returns status of BIBL_OK or BIBL_ERR_MEMERR
179  */
180 static int
bibl_setwriteparams(param * np,param * op)181 bibl_setwriteparams( param *np, param *op )
182 {
183 	int status;
184 	status = bibl_duplicateparams( np, op );
185 	if ( status == BIBL_OK ) {
186 		np->xmlin         = 0;
187 		np->latexin       = 0;
188 		np->utf8in        = 1;
189 		np->charsetin     = BIBL_CHARSET_UNICODE;
190 		np->charsetin_src = BIBL_SRC_DEFAULT;
191 		np->readformat    = BIBL_INTERNALIN;
192 	}
193 	return status;
194 }
195 
196 void
bibl_freeparams(param * p)197 bibl_freeparams( param *p )
198 {
199 	if ( p ) {
200 		slist_free( &(p->asis) );
201 		slist_free( &(p->corps) );
202 		if ( p->progname ) free( p->progname );
203 	}
204 }
205 
206 int
bibl_readasis(param * p,char * f)207 bibl_readasis( param *p, char *f )
208 {
209 	int status;
210 
211 	if ( !p ) return BIBL_ERR_BADINPUT;
212 	if ( !f ) return BIBL_ERR_BADINPUT;
213 
214 	status = slist_fill( &(p->asis), f, 1 );
215 
216 	if ( status == SLIST_ERR_CANTOPEN ) return BIBL_ERR_CANTOPEN;
217 	else if ( status == SLIST_ERR_MEMERR ) return BIBL_ERR_MEMERR;
218 	return BIBL_OK;
219 }
220 
221 int
bibl_readcorps(param * p,char * f)222 bibl_readcorps( param *p, char *f )
223 {
224 	int status;
225 
226 	if ( !p ) return BIBL_ERR_BADINPUT;
227 	if ( !f ) return BIBL_ERR_BADINPUT;
228 
229 	status = slist_fill( &(p->corps), f, 1 );
230 
231 	if ( status == SLIST_ERR_CANTOPEN ) return BIBL_ERR_CANTOPEN;
232 	else if ( status == 0 ) return BIBL_ERR_MEMERR;
233 	return BIBL_OK;
234 }
235 
236 /* bibl_addtoasis()
237  *
238  * Returns BIBL_OK or BIBL_ERR_MEMERR
239  */
240 int
bibl_addtoasis(param * p,char * d)241 bibl_addtoasis( param *p, char *d )
242 {
243 	int status;
244 
245 	if ( !p ) return BIBL_ERR_BADINPUT;
246 	if ( !d ) return BIBL_ERR_BADINPUT;
247 
248 	status = slist_addc( &(p->asis), d );
249 
250 	return ( status==SLIST_OK )? BIBL_OK : BIBL_ERR_MEMERR;
251 }
252 
253 /* bibl_addtocorps()
254  *
255  * Returns BIBL_OK or BIBL_ERR_MEMERR
256  */
257 int
bibl_addtocorps(param * p,char * d)258 bibl_addtocorps( param *p, char *d )
259 {
260 	int status;
261 
262 	if ( !p ) return BIBL_ERR_BADINPUT;
263 	if ( !d ) return BIBL_ERR_BADINPUT;
264 
265 	status = slist_addc( &(p->corps), d );
266 
267 	return ( status==SLIST_OK )? BIBL_OK : BIBL_ERR_MEMERR;
268 }
269 
270 void
bibl_reporterr(int err)271 bibl_reporterr( int err )
272 {
273 	REprintf( "Bibutils: " );
274 	switch( err ) {
275 		case BIBL_OK:
276 			REprintf( "No error." ); break;
277 		case BIBL_ERR_BADINPUT:
278 			REprintf( "Bad input." ); break;
279 		case BIBL_ERR_MEMERR:
280 			REprintf( "Memory error." ); break;
281 		case BIBL_ERR_CANTOPEN:
282 			REprintf( "Can't open." ); break;
283 		default:
284 			REprintf( "Cannot identify error code %d.", err ); break;
285 	}
286 	REprintf( "\n" );
287 }
288 
289 static int
bibl_illegalinmode(int mode)290 bibl_illegalinmode( int mode )
291 {
292 	if ( mode < BIBL_FIRSTIN || mode > BIBL_LASTIN ) return 1;
293 	else return 0;
294 }
295 
296 static int
bibl_illegaloutmode(int mode)297 bibl_illegaloutmode( int mode )
298 {
299 	if ( mode < BIBL_FIRSTOUT || mode > BIBL_LASTOUT ) return 1;
300 	else return 0;
301 }
302 
303 static void
bibl_verbose_reference(fields * f,char * filename,long refnum)304 bibl_verbose_reference( fields *f, char *filename, long refnum )
305 {
306 	int i, n;
307 	n = fields_num( f );
308 	REprintf( "======== %s %ld : converted\n", filename, refnum );
309 	for ( i=0; i<n; ++i ) {
310 	  // REprintf( "'%s'='%s' level=%d\n",
311 	  // 		(char*) fields_tag( f, i, FIELDS_CHRP_NOUSE ),
312 	  // 		(char*) fields_value( f, i, FIELDS_CHRP_NOUSE ),
313 	  // 		fields_level( f, i ) );
314 	  // Georgi
315 	  REprintf( "'%s'='%s' level=%d; ",
316 			(char*) fields_tag( f, i, FIELDS_CHRP_NOUSE ),
317 			(char*) fields_value( f, i, FIELDS_CHRP_NOUSE ),
318 			fields_level( f, i ) );
319 	  REprintf( "    \n" );
320 	  unsigned char *val = (unsigned char*) fields_value( f, i, FIELDS_CHRP_NOUSE );
321 	  int len = strlen((const char *)val);
322 	  for(int j = 0; j < len ; j++){
323 	    REprintf(" %x", val[j]);
324 	  }
325 	  REprintf( "\n" );
326 
327 	}
328 	REprintf( "\n" );
329 }
330 
331 static void
bibl_verbose(bibl * bin,const char * msg1,const char * msg2)332 bibl_verbose( bibl *bin, const char *msg1, const char *msg2 )
333 {
334 	long i;
335 	// fflush( stdout );
336 	REprintf( "-------------------%s begin %s\n", msg1, msg2);
337 	for ( i=0; i<bin->n; ++i )
338 		bibl_verbose_reference( bin->ref[i], "", i+1 );
339 	REprintf( "-------------------%s end %s\n", msg1, msg2);
340 	// fflush( stderr );
341 }
342 
343 
344 /* extract_tag_value
345  *
346  * Extract the tag and the value for ALWAYS/DEFAULT
347  * entries like: "GENRE:BIBUTILS|Masters thesis"
348  *
349  * tag = "GENRE:BIBUTILS"
350  * value = "Masters thesis"
351  */
352 static int
extract_tag_value(str * tag,str * value,char * p)353 extract_tag_value( str *tag, str *value, char *p )
354 {
355 	str_empty( tag );
356 	while ( p && *p && *p!='|' ) {
357 		str_addchar( tag, *p );
358 		p++;
359 	}
360 	if ( str_memerr( tag ) ) return BIBL_ERR_MEMERR;
361 
362 	if ( p && *p=='|' ) p++;
363 
364 	str_empty( value );
365 	while ( p && *p ) {
366 		str_addchar( value, *p );
367 		p++;
368 	}
369 	if ( str_memerr( tag ) ) return BIBL_ERR_MEMERR;
370 
371 	return BIBL_OK;
372 }
373 
374 /* process_defaultadd()
375  *
376  * Add tag/value pairs that have "DEFAULT" processing
377  * unless a tag/value pair with the same tag has already
378  * been adding during reference processing.
379  */
380 static int
process_defaultadd(fields * f,int reftype,param * r)381 process_defaultadd( fields *f, int reftype, param *r )
382 {
383 	int i, n, process, level, status, ret = BIBL_OK;
384 	str tag, value;
385 	char *p;
386 
387 	strs_init( &tag, &value, NULL );
388 
389 	for ( i=0; i<r->all[reftype].ntags; ++i ) {
390 
391 		process = ((r->all[reftype]).tags[i]).processingtype;
392 		if ( process!=DEFAULT ) continue;
393 
394 		level   = ((r->all[reftype]).tags[i]).level;
395 		p       = ((r->all[reftype]).tags[i]).newstr;
396 
397 		status = extract_tag_value( &tag, &value, p );
398 		if ( status!=BIBL_OK ) {
399 			ret = status;
400 			goto out;
401 		}
402 
403 		n = fields_find( f, tag.data, level );
404 		if ( n==FIELDS_NOTFOUND ) {
405 			status = fields_add( f, tag.data, value.data, level );
406 			if ( status!=FIELDS_OK ) {
407 				ret = BIBL_ERR_MEMERR;
408 				goto out;
409 			}
410 		}
411 
412 	}
413 out:
414 	strs_free( &tag, &value, NULL );
415 
416 	return ret;
417 }
418 
419 /* process_alwaysadd()
420  *
421  * Add tag/value pair to reference from the ALWAYS
422  * processing type without exception (the difference from
423  * DEFAULT processing).
424  */
425 static int
process_alwaysadd(fields * f,int reftype,param * r)426 process_alwaysadd( fields *f, int reftype, param *r )
427 {
428 	int i, process, level, status, ret = BIBL_OK;
429 	str tag, value;
430 	char *p;
431 
432 	strs_init( &tag, &value, NULL );
433 
434 	for ( i=0; i<r->all[reftype].ntags; ++i ) {
435 
436 		process = ((r->all[reftype]).tags[i]).processingtype;
437 		if ( process!=ALWAYS ) continue;
438 
439 		level   = ((r->all[reftype]).tags[i]).level;
440 		p       = ((r->all[reftype]).tags[i]).newstr;
441 
442 		status = extract_tag_value( &tag, &value, p );
443 		if ( status!=BIBL_OK ) {
444 			ret = status;
445 			goto out;
446 		}
447 
448 		status = fields_add( f, tag.data, value.data, level );
449 		if ( status!=FIELDS_OK ) {
450 			ret = BIBL_ERR_MEMERR;
451 			goto out;
452 		}
453 	}
454 
455 out:
456 	strs_free( &tag, &value, NULL );
457 
458 	return ret;
459 }
460 
461 static int
read_refs(FILE * fp,bibl * bin,char * filename,param * p)462 read_refs( FILE *fp, bibl *bin, char *filename, param *p )
463 {
464 	int refnum = 0, bufpos = 0, ret=BIBL_OK, fcharset;/* = CHARSET_UNKNOWN;*/
465 	str reference, line;
466 	char buf[256]="";
467 	fields *ref;
468 
469 	str_init( &reference );
470 	str_init( &line );
471 	while ( p->readf( fp, buf, sizeof(buf), &bufpos, &line, &reference, &fcharset ) ) {
472 		if ( reference.len==0 ) continue;
473 		ref = fields_new();
474 		if ( !ref ) {
475 			ret = BIBL_ERR_MEMERR;
476 			bibl_free( bin );
477 			goto out;
478 		}
479 		if ( p->processf( ref, reference.data, filename, refnum+1, p )){
480 		  			ret = bibl_addref( bin, ref );
481 
482 					// fields_report_stderr(ref); // Georgi, for tests
483 
484 			if ( ret!=BIBL_OK ) {
485 				bibl_free( bin );
486 				fields_delete( ref );
487 				goto out;
488 			}
489 			refnum += 1;
490 		} else {
491 			fields_delete( ref );
492 		}
493 		str_empty( &reference );
494 		if ( fcharset!=CHARSET_UNKNOWN ) {
495 			/* charset from file takes priority over default, but
496 			 * not user-specified */
497 			if ( p->charsetin_src!=BIBL_SRC_USER ) {
498 				p->charsetin_src = BIBL_SRC_FILE;
499 				p->charsetin = fcharset;
500 				if ( fcharset!=CHARSET_UNICODE ) p->utf8in = 0;
501 			}
502 		}
503 	}
504 	if ( p->charsetin==CHARSET_UNICODE ) p->utf8in = 1;
505 out:
506 	str_free( &line );
507 	str_free( &reference );
508 	return ret;
509 }
510 
511 /* Don't manipulate latex for URL's and the like */
512 static int
bibl_notexify(char * tag)513 bibl_notexify( char *tag )
514 {
515 	char *protected[] = { "DOI", "URL", "REFNUM", "FILEATTACH", "FILE" };
516 	int i, nprotected = sizeof( protected ) / sizeof( protected[0] );
517 	for ( i=0; i<nprotected; ++i )
518 		if ( !strcasecmp( tag, protected[i] ) ) return 1;
519 	return 0;
520 }
521 
522 /* bibl_fixcharsetdata()
523  *
524  * returns BIBL_OK or BIBL_ERR_MEMERR
525  */
526 static int
bibl_fixcharsetdata(fields * ref,param * p)527 bibl_fixcharsetdata( fields *ref, param *p )
528 {
529 	str *data;
530 	char *tag;
531 	long i, n;
532 	int ok;
533 
534 	n = fields_num( ref );
535 
536 	for ( i=0; i<n; ++i ) {
537 
538 		tag  = fields_tag( ref, i, FIELDS_CHRP_NOUSE );
539 		data = fields_value( ref, i, FIELDS_STRP_NOUSE );
540 
541 		// Georgi:
542 		// REprintf("p->latexin: %d, p->charsetin: %d\n", p->latexin, p->charsetin );
543 		// REprintf("p->latexout: %d, p->charsetout: %d\n", p->latexout, p->charsetout );
544 
545 		if ( bibl_notexify( tag ) ) {
546 			ok = str_convert( data,
547 				p->charsetin,  0, p->utf8in,  p->xmlin,
548 				p->charsetout, 0, p->utf8out, p->xmlout );
549 		} else {
550 			ok = str_convert( data,
551 				p->charsetin,  p->latexin,  p->utf8in,  p->xmlin,
552 				p->charsetout, p->latexout, p->utf8out, p->xmlout );
553 		}
554 
555 		if ( !ok ) return BIBL_ERR_MEMERR;
556 	}
557 
558 	return BIBL_OK;
559 }
560 
561 /* bibl_fixcharsets()
562  *
563  * returns BIBL_OK or BIBL_ERR_MEMERR
564  */
565 static int
bibl_fixcharsets(bibl * b,param * p)566 bibl_fixcharsets( bibl *b, param *p )
567 {
568 	int status;
569 	long i;
570 
571 	for ( i=0; i<b->n; ++i ) {
572 		status = bibl_fixcharsetdata( b->ref[i], p );
573 		if ( status!=BIBL_OK ) return status;
574 	}
575 
576 	return BIBL_OK;
577 }
578 
579 static int
bibl_addcount(bibl * b)580 bibl_addcount( bibl *b )
581 {
582 	char buf[512];
583 	fields *ref;
584 	long i;
585 	int n;
586 
587 	for ( i=0; i<b->n; ++i ) {
588 
589 		ref = b->ref[i];
590 
591 		n = fields_find( ref, "REFNUM", LEVEL_MAIN );
592 		if ( n==FIELDS_NOTFOUND ) continue;
593 
594 		sprintf( buf, "_%ld", i+1 );
595 		str_strcatc( fields_value( ref, n, FIELDS_STRP_NOUSE ), buf );
596 		if ( str_memerr( fields_value( ref, n, FIELDS_STRP_NOUSE ) ) ) {
597 			return BIBL_ERR_MEMERR;
598 		}
599 
600 	}
601 
602 	return BIBL_OK;
603 }
604 
605 static int
generate_citekey(fields * f,long nref)606 generate_citekey( fields *f, long nref )
607 {
608 	int n1, n2, status, ret;
609 	char *p, buf[100];
610 	str citekey;
611 
612 	str_init( &citekey );
613 
614 	n1 = fields_find( f, "AUTHOR", LEVEL_MAIN );
615 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:ASIS", LEVEL_MAIN );
616 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:CORP", LEVEL_MAIN );
617 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR", LEVEL_ANY );
618 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:ASIS", LEVEL_ANY );
619 	if ( n1==FIELDS_NOTFOUND ) n1 = fields_find( f, "AUTHOR:CORP", LEVEL_ANY );
620 
621 	n2 = fields_find( f, "DATE:YEAR", LEVEL_MAIN );
622 	if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "DATE:YEAR", LEVEL_ANY );
623 	if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "PARTDATE:YEAR", LEVEL_MAIN );
624 	if ( n2==FIELDS_NOTFOUND ) n2 = fields_find( f, "PARTDATE:YEAR", LEVEL_ANY );
625 
626 	if ( n1!=FIELDS_NOTFOUND && n2!=FIELDS_NOTFOUND ) {
627 
628 		p = fields_value( f, n1, FIELDS_CHRP_NOUSE );
629 		while ( p && *p && *p!='|' ) {
630 			if ( !is_ws( *p ) ) str_addchar( &citekey, *p );
631 			p++;
632 		}
633 
634 		p = fields_value( f, n2, FIELDS_CHRP_NOUSE );
635 		while ( p && *p ) {
636 			if ( !is_ws( *p ) ) str_addchar( &citekey, *p );
637 			p++;
638 		}
639 
640 	}
641 
642 	else {
643 		sprintf( buf, "ref%ld", nref );
644 		str_strcpyc( &citekey, buf );
645 	}
646 
647 	if ( str_memerr( &citekey ) ) {
648 		ret = -1;
649 		goto out;
650 	}
651 
652 	status = fields_add( f, "REFNUM", str_cstr( &citekey ), LEVEL_MAIN );
653 	if ( status!=FIELDS_OK ) {
654 		ret = -1;
655 		goto out;
656 	}
657 
658 	ret = fields_find( f, "REFNUM", LEVEL_MAIN );
659 out:
660 	str_free( &citekey );
661 	return ret;
662 }
663 
664 static int
get_citekeys(bibl * bin,slist * citekeys)665 get_citekeys( bibl *bin, slist *citekeys )
666 {
667 	int n, status;
668 	fields *f;
669 	long i;
670 
671 	for ( i=0; i<bin->n; ++i ) {
672 		f = bin->ref[i];
673 		n = fields_find( f, "REFNUM", LEVEL_ANY );
674 		if ( n==FIELDS_NOTFOUND ) n = generate_citekey( f, i+1 );
675 		if ( n!=FIELDS_NOTFOUND && fields_has_value( f, n ) ) {
676 			status = slist_add( citekeys, fields_value( f, n, FIELDS_STRP_NOUSE ) );
677 			if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
678 		} else {
679 			status = slist_addc( citekeys, "" );
680 			if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
681 		}
682 	}
683 
684 	return BIBL_OK;
685 }
686 
687 static int
identify_duplicates(bibl * b,slist * citekeys,int * dup)688 identify_duplicates( bibl *b, slist *citekeys, int *dup )
689 {
690 	int i, j, ndup = 0;
691 
692 	for ( i=0; i<citekeys->n-1; ++i ) {
693 		if ( dup[i]!=-1 ) continue;
694 		for ( j=i+1; j<citekeys->n; ++j ) {
695 			if ( !strcmp( slist_cstr( citekeys, i ),
696 			              slist_cstr( citekeys, j ) ) ) {
697 					dup[i] = i;
698 					dup[j] = i;
699 					ndup++;
700 			}
701 		}
702 	}
703 
704 	return ndup;
705 }
706 
707 static int
build_new_citekey(int nsame,str * old_citekey,str * new_citekey)708 build_new_citekey( int nsame, str *old_citekey, str *new_citekey )
709 {
710 	const char abc[]="abcdefghijklmnopqrstuvwxyz";
711 
712 	str_strcpy( new_citekey, old_citekey );
713 
714 	while ( nsame >= 26 ) {
715 		str_addchar( new_citekey, 'a' );
716 		nsame -= 26;
717 	}
718 
719 	if ( nsame>=0 ) str_addchar( new_citekey, abc[nsame] );
720 
721 	return ( str_memerr( new_citekey ) ) ? BIBL_ERR_MEMERR : BIBL_OK;
722 }
723 
724 static int
resolve_duplicates(bibl * b,slist * citekeys,int * dup)725 resolve_duplicates( bibl *b, slist *citekeys, int *dup )
726 {
727 	int nsame, n, i, j, status = BIBL_OK;
728 	str new_citekey, *ref_citekey;
729 
730 	str_init( &new_citekey );
731 
732 	for ( i=0; i<citekeys->n; ++i ) {
733 
734 		if ( dup[i]==-1 ) continue;
735 
736 		nsame = 0;
737 
738 		for ( j=i; j<citekeys->n; ++j ) {
739 
740 			if ( dup[j]!=i ) continue;
741 
742 			dup[j] = -1;
743 
744 			status = build_new_citekey( nsame, slist_str( citekeys, j ), &new_citekey );
745 			if ( status!=BIBL_OK ) goto out;
746 
747 			n = fields_find( b->ref[j], "REFNUM", LEVEL_ANY );
748 			if ( n==FIELDS_NOTFOUND ) continue;
749 
750 			ref_citekey = fields_value( b->ref[j], n, FIELDS_STRP_NOUSE );
751 
752 			str_strcpy( ref_citekey, &new_citekey );
753 			if ( str_memerr( ref_citekey ) ) { status = BIBL_ERR_MEMERR; goto out; }
754 
755 			nsame++;
756 		}
757 	}
758 out:
759 	str_free( &new_citekey );
760 	return status;
761 }
762 
763 static int
identify_and_resolve_duplicate_citekeys(bibl * b,slist * citekeys)764 identify_and_resolve_duplicate_citekeys( bibl *b, slist *citekeys )
765 {
766 	int i, *dup, ndup, status=BIBL_OK;
767 
768 	dup = ( int * ) malloc( sizeof( int ) * citekeys->n );
769 	if ( !dup ) return BIBL_ERR_MEMERR;
770 	for ( i=0; i<citekeys->n; ++i ) dup[i] = -1;
771 
772 	ndup = identify_duplicates( b, citekeys, dup );
773 
774 	if ( ndup ) status = resolve_duplicates( b, citekeys, dup );
775 
776 	free( dup );
777 	return status;
778 }
779 
780 static int
uniqueify_citekeys(bibl * bin)781 uniqueify_citekeys( bibl *bin )
782 {
783 	slist citekeys;
784 	int status;
785 
786 	slist_init( &citekeys );
787 
788 	status = get_citekeys( bin, &citekeys );
789 	if ( status!=BIBL_OK ) goto out;
790 
791 	status = identify_and_resolve_duplicate_citekeys( bin, &citekeys );
792 out:
793 	slist_free( &citekeys );
794 	return status;
795 }
796 
797 static int
clean_refs(bibl * bin,param * p)798 clean_refs( bibl *bin, param *p )
799 {
800 	if ( p->cleanf ) return p->cleanf( bin, p );
801 	else return BIBL_OK;
802 }
803 
804 static int
convert_refs(bibl * bin,char * fname,bibl * bout,param * p)805 convert_refs( bibl *bin, char *fname, bibl *bout, param *p )
806 {
807 	int reftype = 0, status;
808 	fields *rin, *rout;
809 	long i;
810 
811 	// REprintf("convert_refs: in convert_refs!\n");
812 
813 	// REprintf("convert_refs: bib->n = %d\n", bin->n);
814 	for ( i=0; i<bin->n; ++i ) {
815 	        // REprintf("convert_refs: i = %d\n", i);
816 		rin = bin->ref[i];
817 
818 		// fields_report_stderr( rin );  // Testing only !!!!!!!!!!!!!!!1
819 
820 		rout = fields_new();
821 		if ( !rout ) return BIBL_ERR_MEMERR;
822 
823 		if ( p->typef ) reftype = p->typef( rin, fname, i+1, p );
824 
825 		// REprintf("convert_refs: before p->convertf\n");
826 		status = p->convertf( rin, rout, reftype, p );
827 		// REprintf("convert_refs: after p->convertf\n");
828 		if ( status!=BIBL_OK ) return status;
829 
830 		if ( p->all ) {
831 			status = process_alwaysadd( rout, reftype, p );
832 			if ( status!=BIBL_OK ) return status;
833 			status = process_defaultadd( rout, reftype, p );
834 			if ( status!=BIBL_OK ) return status;
835 		}
836 
837 		status = bibl_addref( bout, rout );
838 		if ( status!=BIBL_OK ) return status;
839 	}
840 
841 	// REprintf("convert_refs: end of convert_refs!\n");
842 	return BIBL_OK;
843 }
844 
845 int
bibl_read(bibl * b,FILE * fp,char * filename,param * p)846 bibl_read( bibl *b, FILE *fp, char *filename, param *p )
847 {
848 	int status = BIBL_OK;
849 	param read_params;
850 	bibl bin;
851 	// REprintf("(bibl_read) in bibl_read!\n");
852 
853 	if ( !b )  return BIBL_ERR_BADINPUT;
854 	if ( !fp ) return BIBL_ERR_BADINPUT;
855 	if ( !p )  return BIBL_ERR_BADINPUT;
856 
857 	if ( bibl_illegalinmode( p->readformat ) ) {
858 	  if ( debug_set( p ) ) report_params( "bibl_read", p );
859 		return BIBL_ERR_BADINPUT;
860 	}
861 
862 	// REprintf("(bibl_read) after bibl_illegalinmode\n");
863 
864 	status = bibl_setreadparams( &read_params, p );
865 
866 	// REprintf("(bibl_read) after bibl_setreadparams\n");
867 
868 	if ( status!=BIBL_OK ) {
869 	  if ( debug_set( p ) ) report_params( "bibl_read", p );
870 		return status;
871 	}
872 
873 	if ( debug_set( &read_params ) ) {
874 	  report_params( "bibl_read", &read_params );
875 	}
876 
877 	bibl_init( &bin );
878 
879 
880 	// REprintf("(bibl_read) before read_refs\n");
881 
882 	status = read_refs( fp, &bin, filename, &read_params );
883 	if ( status!=BIBL_OK ) {
884 	  if ( debug_set( &read_params ) ) report_params( "bibl_read", &read_params );
885 		bibl_freeparams( &read_params );
886 		return status;
887 	}
888 
889 	// // Georgi: for testing
890 	// REprintf("bibl_read: (after(read_refs)\n");
891 	// for(long i = 0; i < bin.n; ++i) {
892 	//   fields_report_stderr( bin.ref[i] );
893 	// }
894 
895 	if ( debug_set( &read_params ) ) {
896 		bibl_verbose( &bin, "raw_input", "for bibl_read" );
897 	}
898 
899 	if ( !read_params.output_raw || ( read_params.output_raw & BIBL_RAW_WITHCLEAN )) {
900 		status = clean_refs( &bin, &read_params );
901 		if ( status!=BIBL_OK ) goto out;
902 		if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_clean_refs", "for bibl_read" );
903 	}
904 
905 	// // Georgi: for testing
906 	// REprintf("bibl_read: (after(clean_refs)\n");
907 	// for(long i = 0; i < bin.n; ++i) {
908 	//   fields_report_stderr( bin.ref[i] );
909 	// }
910 
911 	if ( ( !read_params.output_raw ) || ( read_params.output_raw & BIBL_RAW_WITHCHARCONVERT ) ) {
912 	  	status = bibl_fixcharsets( &bin, &read_params );
913 		if ( status!=BIBL_OK ) goto out;
914 		if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_fixcharsets", "for bibl_read" );
915 	}
916 
917 	// REprintf("bibl_read: (after(bibl_fixcharsets)\n");
918 	// Georgi: for testing
919 	// for(long i = 0; i < bin.n; ++i) {
920 	//   fields_report_stderr( bin.ref[i] );
921 	// }
922 
923 	if ( !read_params.output_raw ) {
924 	        // REprintf("bibl_read: before convert_refs; read_params.output_raw is FALSE\n");
925 		status = convert_refs( &bin, filename, b, &read_params );
926 		if ( status!=BIBL_OK ) goto out;
927 		if ( debug_set( &read_params ) ) bibl_verbose( b, "post_convert_refs", "for bibl_read" );
928 	}
929 
930 	else {
931 	  // REprintf("bibl_read: before convert_refs; read_params.output_raw is TRUE\n");
932 	 	status = bibl_copy( b, &bin );
933 	 	if ( status!=BIBL_OK ) goto out;
934 	 	if ( debug_set( &read_params ) ) bibl_verbose( b, "post_bibl_copy", "for bibl_read" );
935 	}
936 
937 
938 	// REprintf("bibl_read: before 'if' and uniquify_citekeys\n");
939 
940 	if ( ( !read_params.output_raw ) || ( read_params.output_raw & BIBL_RAW_WITHMAKEREFID ) ) {
941 	        // REprintf("bibl_read: before uniquify_citekeys\n");
942 		status = uniqueify_citekeys( b );
943 		if ( status!=BIBL_OK ) goto out;
944 		if ( read_params.addcount ) {
945 			status = bibl_addcount( b );
946 			if ( status!=BIBL_OK ) goto out;
947 		}
948 		if ( debug_set( &read_params ) ) bibl_verbose( &bin, "post_uniqueify_citekeys", "for bibl_read" );
949 	}
950 
951 	// // Georgi: for testing
952 	// REprintf("\nbibl_read: at end of bibl_read\n");
953 	// for(long i = 0; i < b->n; ++i) {
954 	//   fields_report_stderr( b->ref[i] );
955 	// }
956 
957 out:
958 	bibl_free( &bin );
959 	bibl_freeparams( &read_params );
960 
961 	return status;
962 }
963 
964 static FILE *
singlerefname(fields * reffields,long nref,int mode)965 singlerefname( fields *reffields, long nref, int mode )
966 {
967 	char outfile[2048];
968 	char suffix[5] = "xml";
969 	FILE *fp;
970 	long count;
971 	int  found;
972 	if      ( mode==BIBL_ADSABSOUT )     strcpy( suffix, "ads" );
973 	else if ( mode==BIBL_BIBTEXOUT )     strcpy( suffix, "bib" );
974 	else if ( mode==BIBL_ENDNOTEOUT )    strcpy( suffix, "end" );
975 	else if ( mode==BIBL_ISIOUT )        strcpy( suffix, "isi" );
976 	else if ( mode==BIBL_MODSOUT )       strcpy( suffix, "xml" );
977 	else if ( mode==BIBL_RISOUT )        strcpy( suffix, "ris" );
978 	else if ( mode==BIBL_WORD2007OUT )   strcpy( suffix, "xml" );
979 	found = fields_find( reffields, "REFNUM", LEVEL_MAIN );
980 	/* find new filename based on reference */
981 	if ( found!=-1 ) {
982 		sprintf( outfile,"%s.%s",(char*)fields_value(reffields,found,FIELDS_CHRP_NOUSE), suffix );
983 	} else  sprintf( outfile,"%ld.%s",nref, suffix );
984 	count = 0;
985 	fp = fopen( outfile, "r" );
986 	while ( fp ) {
987 		fclose(fp);
988 		count++;
989 		if ( count==60000 ) return NULL;
990 		if ( found!=-1 )
991 			sprintf( outfile, "%s_%ld.%s", (char*)fields_value( reffields, found, FIELDS_CHRP_NOUSE ), count, suffix );
992 		else sprintf( outfile,"%ld_%ld.%s", nref, count, suffix );
993 		fp = fopen( outfile, "r" );
994 	}
995 	return fopen( outfile, "w" );
996 }
997 
998 static int
bibl_writeeachfp(FILE * fp,bibl * b,param * p)999 bibl_writeeachfp( FILE *fp, bibl *b, param *p )
1000 {
1001 	fields out, *use = &out;
1002 	int status;
1003 	long i;
1004 
1005 	fields_init( &out );
1006 
1007 	for ( i=0; i<b->n; ++i ) {
1008 
1009 		fp = singlerefname( b->ref[i], i, p->writeformat );
1010 		if ( !fp ) return BIBL_ERR_CANTOPEN;
1011 
1012 		if ( p->headerf ) p->headerf( fp, p );
1013 
1014 		if ( p->assemblef ) {
1015 			fields_free( &out );
1016 			status = p->assemblef( b->ref[i], &out, p, i );
1017 			if ( status!=BIBL_OK ) break;
1018 		} else {
1019 			use = b->ref[i];
1020 		}
1021 
1022 		status = p->writef( use, fp, p, i );
1023 
1024 		if ( p->footerf ) p->footerf( fp );
1025 		fclose( fp );
1026 
1027 		if ( status!=BIBL_OK ) return status;
1028 	}
1029 
1030 	return BIBL_OK;
1031 }
1032 
1033 static int
bibl_writefp(FILE * fp,bibl * b,param * p)1034 bibl_writefp( FILE *fp, bibl *b, param *p )
1035 {
1036 	int status = BIBL_OK;
1037 	fields out, *use = &out;
1038 	long i;
1039 
1040 	fields_init( &out );
1041 
1042 	if ( debug_set( p ) && p->assemblef ) {
1043 	 	REprintf( "-------------------assemblef start for bibl_write\n");
1044 	}
1045 
1046 	if ( p->headerf ) p->headerf( fp, p );
1047 	for ( i=0; i<b->n; ++i ) {
1048 		if ( p->assemblef ) {
1049 			fields_free( &out );
1050 			// Georgi TODO: it seems that xml2nbib crashes here:
1051 			status = p->assemblef( b->ref[i], &out, p, i );
1052 			if ( status!=BIBL_OK ) break;
1053 			if ( debug_set( p ) ) bibl_verbose_reference( &out, "", i+1 );
1054 		} else {
1055 			use = b->ref[i];
1056 		}
1057 
1058 		status = p->writef( use, fp, p, i );
1059 		if ( status!=BIBL_OK ) break;
1060 
1061 	}
1062 
1063 	if ( debug_set( p ) && p->assemblef ) {
1064 	 	REprintf( "-------------------assemblef end for bibl_write\n");
1065 	}
1066 
1067 	if ( p->footerf ) p->footerf( fp );
1068 
1069 	// Georgi: the above loop doesn't free the last reference
1070 	//         (fields_free is safe even if it is just initialised, which is the case here
1071 	fields_free( &out );
1072 
1073 	return status;
1074 }
1075 
1076 int
bibl_write(bibl * b,FILE * fp,param * p)1077 bibl_write( bibl *b, FILE *fp, param *p )
1078 {
1079 	int status;
1080 	param lp;
1081 
1082 	if ( !b ) return BIBL_ERR_BADINPUT;
1083 	if ( !p ) return BIBL_ERR_BADINPUT;
1084 	if ( bibl_illegaloutmode( p->writeformat ) ) return BIBL_ERR_BADINPUT;
1085 	if ( !fp && !p->singlerefperfile ) return BIBL_ERR_BADINPUT;
1086 
1087 	status = bibl_setwriteparams( &lp, p );
1088 	if ( status!=BIBL_OK ) return status;
1089 
1090 	if ( debug_set( p ) ) {
1091 	  report_params( "bibl_write", &lp );
1092 	  // fflush( fp ); // fflush( stdout );
1093 	}
1094 
1095 	if ( debug_set( p ) ) bibl_verbose( b, "raw_input", "for bibl_write" );
1096 
1097 	status = bibl_fixcharsets( b, &lp );
1098 
1099 	if ( status!=BIBL_OK ) goto out;
1100 
1101 	if ( debug_set( p ) ) bibl_verbose( b, "post-fixcharsets", "for bibl_write" );
1102 
1103 	if ( p->singlerefperfile ) status = bibl_writeeachfp( fp, b, &lp );
1104 	else status = bibl_writefp( fp, b, &lp );
1105 
1106 out:
1107 	bibl_freeparams( &lp );
1108 	return status;
1109 }
1110 
1111 
1112 
1113 
1114 
1115