1 /*
2  * copacin.c
3  *
4  * Copyright (c) Chris Putnam 2004-2021
5  *
6  * Program and source code released under the GPL version 2
7  *
8  */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include "is_ws.h"
13 #include "str.h"
14 #include "str_conv.h"
15 #include "slist.h"
16 #include "name.h"
17 #include "fields.h"
18 #include "reftypes.h"
19 #include "bibformats.h"
20 #include "generic.h"
21 
22 extern variants copac_all[];
23 extern int copac_nall;
24 
25 /*****************************************************
26  PUBLIC: void copacin_initparams()
27 *****************************************************/
28 
29 static int copacin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
30 static int copacin_processf( fields *bibin, const char *p, const char *filename, long nref, param *pm );
31 static int copacin_convertf( fields *bibin, fields *info, int reftype, param *pm );
32 
33 int
copacin_initparams(param * pm,const char * progname)34 copacin_initparams( param *pm, const char *progname )
35 {
36 	pm->readformat       = BIBL_COPACIN;
37 	pm->charsetin        = BIBL_CHARSET_DEFAULT;
38 	pm->charsetin_src    = BIBL_SRC_DEFAULT;
39 	pm->latexin          = 0;
40 	pm->xmlin            = 0;
41 	pm->utf8in           = 0;
42 	pm->nosplittitle     = 0;
43 	pm->verbose          = 0;
44 	pm->addcount         = 0;
45 	pm->output_raw       = 0;
46 
47 	pm->readf    = copacin_readf;
48 	pm->processf = copacin_processf;
49 	pm->cleanf   = NULL;
50 	pm->typef    = NULL;
51 	pm->convertf = copacin_convertf;
52 	pm->all      = copac_all;
53 	pm->nall     = copac_nall;
54 
55 	slist_init( &(pm->asis) );
56 	slist_init( &(pm->corps) );
57 
58 	if ( !progname ) pm->progname = NULL;
59 	else {
60 		pm->progname = strdup( progname );
61 		if ( !pm->progname ) return BIBL_ERR_MEMERR;
62 	}
63 
64 	return BIBL_OK;
65 }
66 
67 /*****************************************************
68  PUBLIC: int copacin_readf()
69 *****************************************************/
70 
71 /* Endnote-Refer/Copac tag definition:
72     character 1 = alphabetic character
73     character 2 = alphabetic character
74     character 3 = dash
75     character 4 = space
76 */
77 static int
copacin_istag(const char * buf)78 copacin_istag( const char *buf )
79 {
80 	if (! ((buf[0]>='A' && buf[0]<='Z')) || (buf[0]>='a' && buf[0]<='z') )
81 		return 0;
82 	if (! ((buf[1]>='A' && buf[1]<='Z')) || (buf[1]>='a' && buf[1]<='z') )
83 		return 0;
84 	if (buf[2]!='-' ) return 0;
85 	if (buf[3]!=' ' ) return 0;
86 	return 1;
87 }
88 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)89 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
90 {
91 	if ( line->len ) return 1;
92 	else return str_fget( fp, buf, bufsize, bufpos, line );
93 }
94 
95 static int
copacin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)96 copacin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
97 {
98 	int haveref = 0, inref=0;
99 	char *p;
100 	*fcharset = CHARSET_UNKNOWN;
101 	while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
102 		/* blank line separates */
103 		if ( line->data==NULL ) continue;
104 		if ( inref && line->len==0 ) haveref=1;
105 		p = &(line->data[0]);
106 		/* Recognize UTF8 BOM */
107 		if ( line->len > 2 &&
108 				(unsigned char)(p[0])==0xEF &&
109 				(unsigned char)(p[1])==0xBB &&
110 				(unsigned char)(p[2])==0xBF ) {
111 			*fcharset = CHARSET_UNICODE;
112 			p += 3;
113 		}
114 		if ( copacin_istag( p ) ) {
115 			if ( inref ) str_addchar( reference, '\n' );
116 			str_strcatc( reference, p );
117 			inref = 1;
118 		} else if ( inref ) {
119 			/* copac puts tag only on 1st line */
120 			if ( *p ) p++;
121 			if ( *p ) p++;
122 			if ( *p ) p++;
123 			if ( *p ) {
124 				str_addchar( reference, ' ' );
125 				str_strcatc( reference, p );
126 			}
127 		}
128 		str_empty( line );
129 	}
130 	return haveref;
131 }
132 
133 /*****************************************************
134  PUBLIC: int copacin_processf()
135 *****************************************************/
136 
137 static const char*
copacin_addfield(const char * p,str * tag,str * value)138 copacin_addfield( const char *p, str *tag, str *value )
139 {
140 	int i;
141 
142 	str_empty( tag );
143 	str_empty( value );
144 
145 	i = 0;
146 	while ( i<3 && *p ) {
147 		str_addchar( tag, *p );
148 		p++;
149 		i++;
150 	}
151 
152 	while ( *p==' ' || *p=='\t' ) p++;
153 
154 	while ( *p && *p!='\r' && *p!='\n' ) {
155 		str_addchar( value, *p );
156 		p++;
157 	}
158 
159 	str_trimendingws( value );
160 
161 	while ( *p=='\n' || *p=='\r' ) p++;
162 
163 	return p;
164 }
165 
166 static const char *
copacin_nextline(const char * p)167 copacin_nextline( const char *p )
168 {
169 	while ( *p && *p!='\n' && *p!='\r') p++;
170 	while ( *p=='\n' || *p=='\r' ) p++;
171 	return p;
172 }
173 
174 static int
copacin_processf(fields * copacin,const char * p,const char * filename,long nref,param * pm)175 copacin_processf( fields *copacin, const char *p, const char *filename, long nref, param *pm )
176 {
177 	int status, ret = 1;
178 	str tag, value;
179 
180 	str_init( &tag );
181 	str_init( &value );
182 
183 	while ( *p ) {
184 
185 		p = skip_ws( p );
186 
187 		if ( copacin_istag( p ) ) {
188 			p = copacin_addfield( p, &tag, &value );
189 			/* don't add empty strings */
190 			if ( str_has_value( &tag ) && str_has_value( &value ) ) {
191 				status = fields_add( copacin, str_cstr( &tag ), str_cstr( &value ), LEVEL_MAIN );
192 				if ( status!=FIELDS_OK ) {
193 					ret = 0;
194 					goto out;
195 				}
196 			}
197 		}
198 
199 		else {
200 			p = copacin_nextline( p );
201 		}
202 	}
203 
204 out:
205 	str_free( &tag );
206 	str_free( &value );
207 
208 	return ret;
209 }
210 
211 /*****************************************************
212  PUBLIC: int copacin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
213 *****************************************************/
214 
215 /* copac names appear to always start with last name first, but don't
216  * always seem to have a comma after the name
217  *
218  * editors seem to be stuck in as authors with the tag "[Editor]" in it
219  */
220 static int
copacin_person(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)221 copacin_person( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
222 {
223 	char *usetag = outtag, editor[]="EDITOR";
224 	int comma = 0, i, status;
225 	str usename, *s;
226 	slist tokens;
227 
228 	if ( slist_find( &(pm->asis),  invalue ) !=-1  ||
229 	     slist_find( &(pm->corps), invalue ) !=-1 ) {
230 		return add_name( bibout, outtag, str_cstr( invalue ), level, &(pm->asis), &(pm->corps) );
231 	}
232 
233 	slist_init( &tokens );
234 	str_init( &usename );
235 
236 	status = slist_tokenize( &tokens, invalue, " ", 1 );
237 	if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
238 
239 	for ( i=0; i<tokens.n; ++i ) {
240 		s = slist_str( &tokens, i );
241 		if ( !strcmp( str_cstr( s ), "[Editor]" ) ) {
242 			usetag = editor;
243 			str_empty( s );
244 		} else if ( s->len && s->data[s->len-1]==',' ) {
245 			comma++;
246 		}
247 	}
248 
249 	if ( comma==0 && tokens.n ) {
250 		s = slist_str( &tokens, 0 );
251 		str_addchar( s, ',' );
252 	}
253 
254 	for ( i=0; i<tokens.n; ++i ) {
255 		s = slist_str( &tokens, i );
256 		if ( str_is_empty( s ) ) continue;
257 		if ( i ) str_addchar( &usename, ' ' );
258 		str_strcat( &usename, s );
259 	}
260 
261 	slist_free( &tokens );
262 
263 	status = add_name( bibout, usetag, str_cstr( &usename ), level, &(pm->asis), &(pm->corps) );
264 
265 	str_free( &usename );
266 
267 	return status;
268 }
269 
270 static void
copacin_report_notag(param * p,char * tag)271 copacin_report_notag( param *p, char *tag )
272 {
273 	if ( p->verbose ) {
274 		if ( p->progname ) fprintf( stderr, "%s: ", p->progname );
275 		fprintf( stderr, "Cannot find tag '%s'\n", tag );
276 	}
277 }
278 
279 static int
copacin_convertf(fields * bibin,fields * bibout,int reftype,param * p)280 copacin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
281 {
282 	static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
283 		[ 0 ... NUM_REFTYPES-1 ] = generic_null,
284 		[ SIMPLE       ] = generic_simple,
285 		[ TITLE        ] = generic_title,
286 		[ NOTES        ] = generic_notes,
287 		[ SERIALNO     ] = generic_serialno,
288 		[ PERSON       ] = copacin_person
289 	};
290 
291 	int  process, level, i, nfields, status = BIBL_OK;
292 	str *intag, *invalue;
293 	char *outtag;
294 
295 	nfields = fields_num( bibin );
296 	for ( i=0; i<nfields; ++i ) {
297 
298 		intag = fields_tag( bibin, i, FIELDS_STRP );
299 
300 		if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
301 			copacin_report_notag( p, str_cstr( intag ) );
302 			continue;
303 		}
304 
305 		invalue = fields_value( bibin, i, FIELDS_STRP );
306 
307 		status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
308 		if ( status!=BIBL_OK ) return status;
309 
310 	}
311 
312 	return status;
313 }
314