1 /*
2  * isiin.c
3  *
4  * Copyright (c) Chris Putnam 2004-2020
5  * Copyright (c) Georgi N. Boshnakov 2020
6  *
7  * Program and source code released under the GPL version 2
8  *
9  */
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ctype.h>
14 #include "is_ws.h"
15 #include "str.h"
16 #include "str_conv.h"
17 #include "name.h"
18 #include "fields.h"
19 #include "reftypes.h"
20 #include "bibformats.h"
21 #include "generic.h"
22 
23 extern variants isi_all[];
24 extern int isi_nall;
25 
26 static int isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
27 static int isiin_typef( fields *isiin, const char *filename, int nref, param *p );
28 static int isiin_convertf( fields *isiin, fields *info, int reftype, param *p );
29 static int isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm );
30 
31 
32 /*****************************************************
33  PUBLIC: void isiin_initparams()
34 *****************************************************/
35 int
isiin_initparams(param * pm,const char * progname)36 isiin_initparams( param *pm, const char *progname )
37 {
38 	pm->readformat       = BIBL_ISIIN;
39 	pm->charsetin        = BIBL_CHARSET_DEFAULT;
40 	pm->charsetin_src    = BIBL_SRC_DEFAULT;
41 	pm->latexin          = 0;
42 	pm->xmlin            = 0;
43 	pm->utf8in           = 0;
44 	pm->nosplittitle     = 0;
45 	pm->verbose          = 0;
46 	pm->addcount         = 0;
47 	pm->output_raw       = 0;
48 
49 	pm->readf    = isiin_readf;
50 	pm->processf = isiin_processf;
51 	pm->cleanf   = NULL;
52 	pm->typef    = isiin_typef;
53 	pm->convertf = isiin_convertf;
54 	pm->all      = isi_all;
55 	pm->nall     = isi_nall;
56 
57 	slist_init( &(pm->asis) );
58 	slist_init( &(pm->corps) );
59 
60 	if ( !progname ) pm->progname = NULL;
61 	else {
62 		pm->progname = strdup( progname );
63 		if ( !pm->progname ) return BIBL_ERR_MEMERR;
64 	}
65 
66 	return BIBL_OK;
67 }
68 
69 /*****************************************************
70  PUBLIC: int isiin_readf()
71 *****************************************************/
72 
73 /* ISI definition of a tag is strict:
74  *   char 1 = uppercase alphabetic character
75  *   char 2 = uppercase alphabetic character or digit
76  */
77 
78 static int
is_isi_tag(const char * buf)79 is_isi_tag( const char *buf )
80 {
81 	if ( !isupper( (unsigned char )buf[0] ) ) return 0;
82 	if ( !( isupper( (unsigned char )buf[1] ) || isdigit( (unsigned char )buf[1] ) ) ) return 0;
83 	return 1;
84 }
85 
86 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)87 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
88 {
89 	if ( line->len ) return 1;
90 	else return str_fget( fp, buf, bufsize, bufpos, line );
91 }
92 
93 static int
isiin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)94 isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
95 {
96 	int haveref = 0, inref = 0;
97 	char *p;
98 
99 	*fcharset = CHARSET_UNKNOWN;
100 
101 	while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
102 
103 		if ( str_is_empty( line ) ) continue;
104 
105 		p = str_cstr( line );
106 
107 		/* Recognize UTF8 BOM */
108 		if ( line->len > 2 &&
109 				(unsigned char)(p[0])==0xEF &&
110 				(unsigned char)(p[1])==0xBB &&
111 				(unsigned char)(p[2])==0xBF ) {
112 			*fcharset = CHARSET_UNICODE;
113 			p += 3;
114 		}
115 
116 		/* Each reference ends with 'ER ' */
117 		if ( is_isi_tag( p ) ) {
118 			if ( !strncmp( p, "FN ", 3 ) ) {
119 				if (strncasecmp( p, "FN ISI Export Format",20)){
120 					REprintf( ": warning file FN type not '%s' not recognized.\n", /*r->progname,*/ p );
121 				}
122 			} else if ( !strncmp( p, "VR ", 3 ) ) {
123 				if ( strncasecmp( p, "VR 1.0", 6 ) ) {
124 					REprintf(": warning file version number '%s' not recognized, expected 'VR 1.0'\n", /*r->progname,*/ p );
125 				}
126 			} else if ( !strncmp( p, "ER", 2 ) ) haveref = 1;
127 			else {
128 				str_addchar( reference, '\n' );
129 				str_strcatc( reference, p );
130 				inref = 1;
131 			}
132 			str_empty( line );
133 		}
134 		/* not a tag, but we'll append to the last values */
135 		else if ( inref ) {
136 			str_addchar( reference, '\n' );
137 			str_strcatc( reference, p );
138 			str_empty( line );
139 		}
140 		else {
141 			str_empty( line );
142 		}
143 	}
144 	return haveref;
145 }
146 
147 /*****************************************************
148  PUBLIC: int isiin_processf()
149 *****************************************************/
150 
151 static const char *
process_tagged_line(str * tag,str * value,const char * p)152 process_tagged_line( str *tag, str *value, const char *p )
153 {
154 	int i = 0;
155 
156 	/* collect tag and skip past it */
157 	while ( i<2 && *p && *p!='\r' && *p!='\n') {
158 		str_addchar( tag, *p );
159 		p++;
160 		i++;
161 	}
162 
163 	while ( *p==' ' || *p=='\t' ) p++;
164 
165 	while ( *p && *p!='\r' && *p!='\n' ) {
166 		str_addchar( value, *p );
167 		p++;
168 	}
169 
170 	str_trimendingws( value );
171 
172 	while ( *p=='\r' || *p=='\n' ) p++;
173 
174 	return p;
175 }
176 
177 static const char *
process_untagged_line(str * value,const char * p)178 process_untagged_line( str *value, const char *p )
179 {
180 	while ( *p==' ' || *p=='\t' ) p++;
181 
182 	while ( *p && *p!='\r' && *p!='\n' ) {
183 		str_addchar( value, *p );
184 		p++;
185 	}
186 
187 	str_trimendingws( value );
188 
189 	while ( *p=='\r' || *p=='\n' ) p++;
190 
191 	return p;
192 }
193 
194 static int
add_tag_value(fields * isiin,str * tag,str * value,int * tag_added)195 add_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
196 {
197 	int status;
198 
199 	if ( str_has_value( tag ) && str_has_value( value ) ) {
200 		status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
201 		if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
202 		*tag_added = 1;
203 	}
204 
205 	else {
206 		*tag_added = 0;
207 	}
208 
209 	return BIBL_OK;
210 }
211 
212 static int
merge_tag_value(fields * isiin,str * tag,str * value,int * tag_added)213 merge_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
214 {
215 	int n, status;
216 	str *oldvalue;
217 
218 	if ( str_has_value( value ) ) {
219 
220 		if ( *tag_added==1 ) {
221 
222 			n = fields_num( isiin );
223 			if ( n==0 ) return BIBL_OK;
224 
225 			/* only one AU or AF for list of authors */
226 			if ( !strcmp( str_cstr( tag ), "AU" ) ) {
227 				status = fields_add( isiin, "AU", str_cstr( value ), LEVEL_MAIN );
228 				if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
229 			} else if ( !strcmp( str_cstr( tag ), "AF" ) ) {
230 				status = fields_add( isiin, "AF", str_cstr( value ), LEVEL_MAIN );
231 				if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
232 			}
233 			/* otherwise append multiline data */
234 			else {
235 				oldvalue = fields_value( isiin, n-1, FIELDS_STRP_NOUSE );
236 				str_addchar( oldvalue, ' ' );
237 				str_strcat( oldvalue, value );
238 				if ( str_memerr( oldvalue ) ) return BIBL_ERR_MEMERR;
239 			}
240 		}
241 
242 		else {
243                         status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
244                         if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
245                         *tag_added = 1;
246 		}
247 	}
248 
249 	return BIBL_OK;
250 }
251 
252 static int
isiin_processf(fields * isiin,const char * p,const char * filename,long nref,param * pm)253 isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm )
254 {
255 	int status, tag_added = 0, ret = 1;
256 	str tag, value;
257 
258 	strs_init( &tag, &value, NULL );
259 
260 	while ( *p ) {
261 
262 		/* ...with tag, add */
263 		if ( is_isi_tag( p ) ) {
264 			str_empty( &tag );
265 			str_empty( &value );
266 			p = process_tagged_line( &tag, &value, p );
267 			status = add_tag_value( isiin, &tag, &value, &tag_added );
268 			if ( status!=BIBL_OK ) {
269 				ret = 0;
270 				goto out;
271 			}
272 		}
273 
274 		/* ...untagged, merge -- one AU or AF for list of authors */
275 		else {
276 			str_empty( &value );
277 			p = process_untagged_line( &value, p );
278 			status = merge_tag_value( isiin, &tag, &value, &tag_added );
279 			if ( status!=BIBL_OK ) {
280 				ret = 0;
281 				goto out;
282 			}
283 		}
284 
285 	}
286 out:
287 	strs_free( &value, &tag, NULL );
288 	return ret;
289 }
290 
291 /*****************************************************
292  PUBLIC: int isiin_typef()
293 *****************************************************/
294 static int
isiin_typef(fields * isiin,const char * filename,int nref,param * p)295 isiin_typef( fields *isiin, const char *filename, int nref, param *p )
296 {
297 	int ntypename, nrefname, is_default;
298 	char *refname = "", *typename="";
299 
300 	ntypename = fields_find( isiin, "PT", LEVEL_MAIN );
301 	nrefname  = fields_find( isiin, "UT", LEVEL_MAIN );
302 
303 	if ( nrefname!=FIELDS_NOTFOUND )  refname  = fields_value( isiin, nrefname,  FIELDS_CHRP_NOUSE );
304 	if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( isiin, ntypename, FIELDS_CHRP_NOUSE );
305 
306 	return get_reftype( typename, nref, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
307 }
308 
309 /*****************************************************
310  PUBLIC: int isiin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
311 *****************************************************/
312 
313 /* pull off authors first--use AF before AU */
314 static int
isiin_addauthors(fields * isiin,fields * info,int reftype,variants * all,int nall,slist * asis,slist * corps)315 isiin_addauthors( fields *isiin, fields *info, int reftype, variants *all, int nall, slist *asis, slist *corps )
316 {
317 	char *newtag, *authortype, use_af[]="AF", use_au[]="AU";
318 	int level, i, n, has_af=0, has_au=0, nfields, ok;
319 	str *t, *d;
320 
321 	nfields = fields_num( isiin );
322 	for ( i=0; i<nfields && has_af==0; ++i ) {
323 		t = fields_tag( isiin, i, FIELDS_STRP );
324 		if ( !strcasecmp( t->data, "AU" ) ) has_au++;
325 		if ( !strcasecmp( t->data, "AF" ) ) has_af++;
326 	}
327 	if ( has_af ) authortype = use_af;
328 	else if ( has_au ) authortype = use_au;
329 	else return BIBL_OK; /* no authors */
330 
331 	for ( i=0; i<nfields; ++i ) {
332 		t = fields_tag( isiin, i, FIELDS_STRP );
333 		if ( strcasecmp( t->data, authortype ) ) continue;
334 		d = fields_value( isiin, i, FIELDS_STRP );
335 		n = process_findoldtag( authortype, reftype, all, nall );
336 		level = ((all[reftype]).tags[n]).level;
337 		newtag = all[reftype].tags[n].newstr;
338 		ok = name_add( info, newtag, d->data, level, asis, corps );
339 		if ( !ok ) return BIBL_ERR_MEMERR;
340 	}
341 	return BIBL_OK;
342 }
343 
344 static int
isiin_keyword(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)345 isiin_keyword( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
346 {
347 	const char *p = str_cstr( invalue );
348 	int fstatus, status = BIBL_OK;
349 	str keyword;
350 
351 	str_init( &keyword );
352 	while ( *p ) {
353 		p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
354 		if ( str_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; }
355 		if ( str_has_value( &keyword ) ) {
356 			fstatus = fields_add( bibout, outtag, keyword.data, level );
357 			if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
358 		}
359 	}
360 out:
361 	str_free( &keyword );
362 	return status;
363 }
364 
365 static void
isiin_report_notag(param * p,char * tag)366 isiin_report_notag( param *p, char *tag )
367 {
368 	if ( p->verbose && strcmp( tag, "PT" ) ) {
369 		if ( p->progname ) REprintf( "%s: ", p->progname );
370 		REprintf( "Did not identify ISI tag '%s'\n", tag );
371 	}
372 }
373 
374 static int
isiin_convertf(fields * bibin,fields * bibout,int reftype,param * p)375 isiin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
376 {
377 	static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
378 		// [ 0 ... NUM_REFTYPES-1 ] = generic_null,
379 		// [ SIMPLE       ] = generic_simple,
380 		// [ TITLE        ] = generic_title,
381 		// [ PERSON       ] = generic_person,
382 		// [ SERIALNO     ] = generic_serialno,
383 		// [ DATE         ] = generic_simple,
384 		// [ NOTES        ] = generic_notes,
385 		// [ KEYWORD      ] = isiin_keyword,
386 
387                 [ ALWAYS           ] = generic_null,  // (0)
388 		[ DEFAULT          ] = generic_null,  // (1)
389 		[ SKIP             ] = generic_null,  // (2)
390 		[ SIMPLE           ] = generic_simple, // (3)
391 		[ TYPE             ] = generic_null,  // (4)
392 		[ PERSON           ] = generic_person,  // (5)
393 		[ DATE             ] = generic_simple,  // (6)
394 		[ PAGES            ] = generic_null,  // (7)
395 		[ SERIALNO         ] = generic_serialno,  // (8)
396 		[ TITLE            ] = generic_title,  // (9)
397 		[ NOTES            ] = generic_notes,  // (10)
398 		[ DOI              ] = generic_null,  // (11)
399 		[ HOWPUBLISHED     ] = generic_null,  // (12)
400 		[ LINKEDFILE       ] = generic_null,  // (13)
401 		[ KEYWORD          ] = isiin_keyword,  // (14)
402 		[ URL              ] = generic_null,  // (15)
403 		[ GENRE            ] = generic_null,  // (16)
404 		[ BT_SENTE         ] = generic_null,  // (17) /* Bibtex 'Sente' */
405 		[ BT_EPRINT        ] = generic_null,  // (18) /* Bibtex 'Eprint' */
406 		[ BT_ORG           ] = generic_null,  // (19) /* Bibtex Organization */
407 		[ BLT_THESIS_TYPE  ] = generic_null,  // (20) /* Biblatex Thesis Type */
408 		[ BLT_SCHOOL       ] = generic_null,  // (21) /* Biblatex School */
409 		[ BLT_EDITOR       ] = generic_null,  // (22) /* Biblatex Editor */
410 		[ BLT_SUBTYPE      ] = generic_null,  // (23) /* Biblatex entrysubtype */
411 		[ BLT_SKIP         ] = generic_null,  // (24) /* Biblatex Skip Entry */
412 		[ EPRINT           ] = generic_null,  // (25)
413 
414 	};
415 
416 	int process, level, i, nfields, status;
417 	str *intag, *invalue;
418 	char *outtag;
419 
420 	status = isiin_addauthors( bibin, bibout, reftype, p->all, p->nall, &(p->asis), &(p->corps) );
421 	if ( status!=BIBL_OK ) return status;
422 
423 	nfields = fields_num( bibin );
424 	for ( i=0; i<nfields; ++i ) {
425 
426 		intag = fields_tag( bibin, i, FIELDS_STRP );
427 		if ( !strcasecmp( str_cstr( intag ), "AU" ) || !strcasecmp( str_cstr( intag ), "AF" ) )
428 			continue;
429 
430 		if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
431 			isiin_report_notag( p, str_cstr( intag ) );
432 			continue;
433 		}
434 
435 		invalue = fields_value( bibin, i, FIELDS_STRP );
436 
437 		status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
438 		if ( status!=BIBL_OK ) return status;
439 	}
440 
441 	if ( status==BIBL_OK && p->verbose ) fields_report_stderr( bibout );
442 
443 	return status;
444 }
445