1 /*
2 * isiin.c
3 *
4 * Copyright (c) Chris Putnam 2004-2020
5 * Copyright (c) Georgi N. Boshnakov 2020
6 *
7 * Program and source code released under the GPL version 2
8 *
9 */
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ctype.h>
14 #include "is_ws.h"
15 #include "str.h"
16 #include "str_conv.h"
17 #include "name.h"
18 #include "fields.h"
19 #include "reftypes.h"
20 #include "bibformats.h"
21 #include "generic.h"
22
23 extern variants isi_all[];
24 extern int isi_nall;
25
26 static int isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
27 static int isiin_typef( fields *isiin, const char *filename, int nref, param *p );
28 static int isiin_convertf( fields *isiin, fields *info, int reftype, param *p );
29 static int isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm );
30
31
32 /*****************************************************
33 PUBLIC: void isiin_initparams()
34 *****************************************************/
35 int
isiin_initparams(param * pm,const char * progname)36 isiin_initparams( param *pm, const char *progname )
37 {
38 pm->readformat = BIBL_ISIIN;
39 pm->charsetin = BIBL_CHARSET_DEFAULT;
40 pm->charsetin_src = BIBL_SRC_DEFAULT;
41 pm->latexin = 0;
42 pm->xmlin = 0;
43 pm->utf8in = 0;
44 pm->nosplittitle = 0;
45 pm->verbose = 0;
46 pm->addcount = 0;
47 pm->output_raw = 0;
48
49 pm->readf = isiin_readf;
50 pm->processf = isiin_processf;
51 pm->cleanf = NULL;
52 pm->typef = isiin_typef;
53 pm->convertf = isiin_convertf;
54 pm->all = isi_all;
55 pm->nall = isi_nall;
56
57 slist_init( &(pm->asis) );
58 slist_init( &(pm->corps) );
59
60 if ( !progname ) pm->progname = NULL;
61 else {
62 pm->progname = strdup( progname );
63 if ( !pm->progname ) return BIBL_ERR_MEMERR;
64 }
65
66 return BIBL_OK;
67 }
68
69 /*****************************************************
70 PUBLIC: int isiin_readf()
71 *****************************************************/
72
73 /* ISI definition of a tag is strict:
74 * char 1 = uppercase alphabetic character
75 * char 2 = uppercase alphabetic character or digit
76 */
77
78 static int
is_isi_tag(const char * buf)79 is_isi_tag( const char *buf )
80 {
81 if ( !isupper( (unsigned char )buf[0] ) ) return 0;
82 if ( !( isupper( (unsigned char )buf[1] ) || isdigit( (unsigned char )buf[1] ) ) ) return 0;
83 return 1;
84 }
85
86 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)87 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
88 {
89 if ( line->len ) return 1;
90 else return str_fget( fp, buf, bufsize, bufpos, line );
91 }
92
93 static int
isiin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)94 isiin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
95 {
96 int haveref = 0, inref = 0;
97 char *p;
98
99 *fcharset = CHARSET_UNKNOWN;
100
101 while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
102
103 if ( str_is_empty( line ) ) continue;
104
105 p = str_cstr( line );
106
107 /* Recognize UTF8 BOM */
108 if ( line->len > 2 &&
109 (unsigned char)(p[0])==0xEF &&
110 (unsigned char)(p[1])==0xBB &&
111 (unsigned char)(p[2])==0xBF ) {
112 *fcharset = CHARSET_UNICODE;
113 p += 3;
114 }
115
116 /* Each reference ends with 'ER ' */
117 if ( is_isi_tag( p ) ) {
118 if ( !strncmp( p, "FN ", 3 ) ) {
119 if (strncasecmp( p, "FN ISI Export Format",20)){
120 REprintf( ": warning file FN type not '%s' not recognized.\n", /*r->progname,*/ p );
121 }
122 } else if ( !strncmp( p, "VR ", 3 ) ) {
123 if ( strncasecmp( p, "VR 1.0", 6 ) ) {
124 REprintf(": warning file version number '%s' not recognized, expected 'VR 1.0'\n", /*r->progname,*/ p );
125 }
126 } else if ( !strncmp( p, "ER", 2 ) ) haveref = 1;
127 else {
128 str_addchar( reference, '\n' );
129 str_strcatc( reference, p );
130 inref = 1;
131 }
132 str_empty( line );
133 }
134 /* not a tag, but we'll append to the last values */
135 else if ( inref ) {
136 str_addchar( reference, '\n' );
137 str_strcatc( reference, p );
138 str_empty( line );
139 }
140 else {
141 str_empty( line );
142 }
143 }
144 return haveref;
145 }
146
147 /*****************************************************
148 PUBLIC: int isiin_processf()
149 *****************************************************/
150
151 static const char *
process_tagged_line(str * tag,str * value,const char * p)152 process_tagged_line( str *tag, str *value, const char *p )
153 {
154 int i = 0;
155
156 /* collect tag and skip past it */
157 while ( i<2 && *p && *p!='\r' && *p!='\n') {
158 str_addchar( tag, *p );
159 p++;
160 i++;
161 }
162
163 while ( *p==' ' || *p=='\t' ) p++;
164
165 while ( *p && *p!='\r' && *p!='\n' ) {
166 str_addchar( value, *p );
167 p++;
168 }
169
170 str_trimendingws( value );
171
172 while ( *p=='\r' || *p=='\n' ) p++;
173
174 return p;
175 }
176
177 static const char *
process_untagged_line(str * value,const char * p)178 process_untagged_line( str *value, const char *p )
179 {
180 while ( *p==' ' || *p=='\t' ) p++;
181
182 while ( *p && *p!='\r' && *p!='\n' ) {
183 str_addchar( value, *p );
184 p++;
185 }
186
187 str_trimendingws( value );
188
189 while ( *p=='\r' || *p=='\n' ) p++;
190
191 return p;
192 }
193
194 static int
add_tag_value(fields * isiin,str * tag,str * value,int * tag_added)195 add_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
196 {
197 int status;
198
199 if ( str_has_value( tag ) && str_has_value( value ) ) {
200 status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
201 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
202 *tag_added = 1;
203 }
204
205 else {
206 *tag_added = 0;
207 }
208
209 return BIBL_OK;
210 }
211
212 static int
merge_tag_value(fields * isiin,str * tag,str * value,int * tag_added)213 merge_tag_value( fields *isiin, str *tag, str *value, int *tag_added )
214 {
215 int n, status;
216 str *oldvalue;
217
218 if ( str_has_value( value ) ) {
219
220 if ( *tag_added==1 ) {
221
222 n = fields_num( isiin );
223 if ( n==0 ) return BIBL_OK;
224
225 /* only one AU or AF for list of authors */
226 if ( !strcmp( str_cstr( tag ), "AU" ) ) {
227 status = fields_add( isiin, "AU", str_cstr( value ), LEVEL_MAIN );
228 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
229 } else if ( !strcmp( str_cstr( tag ), "AF" ) ) {
230 status = fields_add( isiin, "AF", str_cstr( value ), LEVEL_MAIN );
231 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
232 }
233 /* otherwise append multiline data */
234 else {
235 oldvalue = fields_value( isiin, n-1, FIELDS_STRP_NOUSE );
236 str_addchar( oldvalue, ' ' );
237 str_strcat( oldvalue, value );
238 if ( str_memerr( oldvalue ) ) return BIBL_ERR_MEMERR;
239 }
240 }
241
242 else {
243 status = fields_add( isiin, str_cstr( tag ), str_cstr( value ), LEVEL_MAIN );
244 if ( status!=FIELDS_OK ) return BIBL_ERR_MEMERR;
245 *tag_added = 1;
246 }
247 }
248
249 return BIBL_OK;
250 }
251
252 static int
isiin_processf(fields * isiin,const char * p,const char * filename,long nref,param * pm)253 isiin_processf( fields *isiin, const char *p, const char *filename, long nref, param *pm )
254 {
255 int status, tag_added = 0, ret = 1;
256 str tag, value;
257
258 strs_init( &tag, &value, NULL );
259
260 while ( *p ) {
261
262 /* ...with tag, add */
263 if ( is_isi_tag( p ) ) {
264 str_empty( &tag );
265 str_empty( &value );
266 p = process_tagged_line( &tag, &value, p );
267 status = add_tag_value( isiin, &tag, &value, &tag_added );
268 if ( status!=BIBL_OK ) {
269 ret = 0;
270 goto out;
271 }
272 }
273
274 /* ...untagged, merge -- one AU or AF for list of authors */
275 else {
276 str_empty( &value );
277 p = process_untagged_line( &value, p );
278 status = merge_tag_value( isiin, &tag, &value, &tag_added );
279 if ( status!=BIBL_OK ) {
280 ret = 0;
281 goto out;
282 }
283 }
284
285 }
286 out:
287 strs_free( &value, &tag, NULL );
288 return ret;
289 }
290
291 /*****************************************************
292 PUBLIC: int isiin_typef()
293 *****************************************************/
294 static int
isiin_typef(fields * isiin,const char * filename,int nref,param * p)295 isiin_typef( fields *isiin, const char *filename, int nref, param *p )
296 {
297 int ntypename, nrefname, is_default;
298 char *refname = "", *typename="";
299
300 ntypename = fields_find( isiin, "PT", LEVEL_MAIN );
301 nrefname = fields_find( isiin, "UT", LEVEL_MAIN );
302
303 if ( nrefname!=FIELDS_NOTFOUND ) refname = fields_value( isiin, nrefname, FIELDS_CHRP_NOUSE );
304 if ( ntypename!=FIELDS_NOTFOUND ) typename = fields_value( isiin, ntypename, FIELDS_CHRP_NOUSE );
305
306 return get_reftype( typename, nref, p->progname, p->all, p->nall, refname, &is_default, REFTYPE_CHATTY );
307 }
308
309 /*****************************************************
310 PUBLIC: int isiin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
311 *****************************************************/
312
313 /* pull off authors first--use AF before AU */
314 static int
isiin_addauthors(fields * isiin,fields * info,int reftype,variants * all,int nall,slist * asis,slist * corps)315 isiin_addauthors( fields *isiin, fields *info, int reftype, variants *all, int nall, slist *asis, slist *corps )
316 {
317 char *newtag, *authortype, use_af[]="AF", use_au[]="AU";
318 int level, i, n, has_af=0, has_au=0, nfields, ok;
319 str *t, *d;
320
321 nfields = fields_num( isiin );
322 for ( i=0; i<nfields && has_af==0; ++i ) {
323 t = fields_tag( isiin, i, FIELDS_STRP );
324 if ( !strcasecmp( t->data, "AU" ) ) has_au++;
325 if ( !strcasecmp( t->data, "AF" ) ) has_af++;
326 }
327 if ( has_af ) authortype = use_af;
328 else if ( has_au ) authortype = use_au;
329 else return BIBL_OK; /* no authors */
330
331 for ( i=0; i<nfields; ++i ) {
332 t = fields_tag( isiin, i, FIELDS_STRP );
333 if ( strcasecmp( t->data, authortype ) ) continue;
334 d = fields_value( isiin, i, FIELDS_STRP );
335 n = process_findoldtag( authortype, reftype, all, nall );
336 level = ((all[reftype]).tags[n]).level;
337 newtag = all[reftype].tags[n].newstr;
338 ok = name_add( info, newtag, d->data, level, asis, corps );
339 if ( !ok ) return BIBL_ERR_MEMERR;
340 }
341 return BIBL_OK;
342 }
343
344 static int
isiin_keyword(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)345 isiin_keyword( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
346 {
347 const char *p = str_cstr( invalue );
348 int fstatus, status = BIBL_OK;
349 str keyword;
350
351 str_init( &keyword );
352 while ( *p ) {
353 p = str_cpytodelim( &keyword, skip_ws( p ), ";", 1 );
354 if ( str_memerr( &keyword ) ) { status = BIBL_ERR_MEMERR; goto out; }
355 if ( str_has_value( &keyword ) ) {
356 fstatus = fields_add( bibout, outtag, keyword.data, level );
357 if ( fstatus!=FIELDS_OK ) { status = BIBL_ERR_MEMERR; goto out; }
358 }
359 }
360 out:
361 str_free( &keyword );
362 return status;
363 }
364
365 static void
isiin_report_notag(param * p,char * tag)366 isiin_report_notag( param *p, char *tag )
367 {
368 if ( p->verbose && strcmp( tag, "PT" ) ) {
369 if ( p->progname ) REprintf( "%s: ", p->progname );
370 REprintf( "Did not identify ISI tag '%s'\n", tag );
371 }
372 }
373
374 static int
isiin_convertf(fields * bibin,fields * bibout,int reftype,param * p)375 isiin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
376 {
377 static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
378 // [ 0 ... NUM_REFTYPES-1 ] = generic_null,
379 // [ SIMPLE ] = generic_simple,
380 // [ TITLE ] = generic_title,
381 // [ PERSON ] = generic_person,
382 // [ SERIALNO ] = generic_serialno,
383 // [ DATE ] = generic_simple,
384 // [ NOTES ] = generic_notes,
385 // [ KEYWORD ] = isiin_keyword,
386
387 [ ALWAYS ] = generic_null, // (0)
388 [ DEFAULT ] = generic_null, // (1)
389 [ SKIP ] = generic_null, // (2)
390 [ SIMPLE ] = generic_simple, // (3)
391 [ TYPE ] = generic_null, // (4)
392 [ PERSON ] = generic_person, // (5)
393 [ DATE ] = generic_simple, // (6)
394 [ PAGES ] = generic_null, // (7)
395 [ SERIALNO ] = generic_serialno, // (8)
396 [ TITLE ] = generic_title, // (9)
397 [ NOTES ] = generic_notes, // (10)
398 [ DOI ] = generic_null, // (11)
399 [ HOWPUBLISHED ] = generic_null, // (12)
400 [ LINKEDFILE ] = generic_null, // (13)
401 [ KEYWORD ] = isiin_keyword, // (14)
402 [ URL ] = generic_null, // (15)
403 [ GENRE ] = generic_null, // (16)
404 [ BT_SENTE ] = generic_null, // (17) /* Bibtex 'Sente' */
405 [ BT_EPRINT ] = generic_null, // (18) /* Bibtex 'Eprint' */
406 [ BT_ORG ] = generic_null, // (19) /* Bibtex Organization */
407 [ BLT_THESIS_TYPE ] = generic_null, // (20) /* Biblatex Thesis Type */
408 [ BLT_SCHOOL ] = generic_null, // (21) /* Biblatex School */
409 [ BLT_EDITOR ] = generic_null, // (22) /* Biblatex Editor */
410 [ BLT_SUBTYPE ] = generic_null, // (23) /* Biblatex entrysubtype */
411 [ BLT_SKIP ] = generic_null, // (24) /* Biblatex Skip Entry */
412 [ EPRINT ] = generic_null, // (25)
413
414 };
415
416 int process, level, i, nfields, status;
417 str *intag, *invalue;
418 char *outtag;
419
420 status = isiin_addauthors( bibin, bibout, reftype, p->all, p->nall, &(p->asis), &(p->corps) );
421 if ( status!=BIBL_OK ) return status;
422
423 nfields = fields_num( bibin );
424 for ( i=0; i<nfields; ++i ) {
425
426 intag = fields_tag( bibin, i, FIELDS_STRP );
427 if ( !strcasecmp( str_cstr( intag ), "AU" ) || !strcasecmp( str_cstr( intag ), "AF" ) )
428 continue;
429
430 if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
431 isiin_report_notag( p, str_cstr( intag ) );
432 continue;
433 }
434
435 invalue = fields_value( bibin, i, FIELDS_STRP );
436
437 status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
438 if ( status!=BIBL_OK ) return status;
439 }
440
441 if ( status==BIBL_OK && p->verbose ) fields_report_stderr( bibout );
442
443 return status;
444 }
445