1 /*
2 * copacin.c
3 *
4 * Copyright (c) Chris Putnam 2004-2021
5 *
6 * Program and source code released under the GPL version 2
7 *
8 */
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include "is_ws.h"
13 #include "str.h"
14 #include "str_conv.h"
15 #include "slist.h"
16 #include "name.h"
17 #include "fields.h"
18 #include "reftypes.h"
19 #include "bibformats.h"
20 #include "generic.h"
21
22 extern variants copac_all[];
23 extern int copac_nall;
24
25 /*****************************************************
26 PUBLIC: void copacin_initparams()
27 *****************************************************/
28
29 static int copacin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset );
30 static int copacin_processf( fields *bibin, const char *p, const char *filename, long nref, param *pm );
31 static int copacin_convertf( fields *bibin, fields *info, int reftype, param *pm );
32
33 int
copacin_initparams(param * pm,const char * progname)34 copacin_initparams( param *pm, const char *progname )
35 {
36 pm->readformat = BIBL_COPACIN;
37 pm->charsetin = BIBL_CHARSET_DEFAULT;
38 pm->charsetin_src = BIBL_SRC_DEFAULT;
39 pm->latexin = 0;
40 pm->xmlin = 0;
41 pm->utf8in = 0;
42 pm->nosplittitle = 0;
43 pm->verbose = 0;
44 pm->addcount = 0;
45 pm->output_raw = 0;
46
47 pm->readf = copacin_readf;
48 pm->processf = copacin_processf;
49 pm->cleanf = NULL;
50 pm->typef = NULL;
51 pm->convertf = copacin_convertf;
52 pm->all = copac_all;
53 pm->nall = copac_nall;
54
55 slist_init( &(pm->asis) );
56 slist_init( &(pm->corps) );
57
58 if ( !progname ) pm->progname = NULL;
59 else {
60 pm->progname = strdup( progname );
61 if ( !pm->progname ) return BIBL_ERR_MEMERR;
62 }
63
64 return BIBL_OK;
65 }
66
67 /*****************************************************
68 PUBLIC: int copacin_readf()
69 *****************************************************/
70
71 /* Endnote-Refer/Copac tag definition:
72 character 1 = alphabetic character
73 character 2 = alphabetic character
74 character 3 = dash
75 character 4 = space
76 */
77 static int
copacin_istag(const char * buf)78 copacin_istag( const char *buf )
79 {
80 if (! ((buf[0]>='A' && buf[0]<='Z')) || (buf[0]>='a' && buf[0]<='z') )
81 return 0;
82 if (! ((buf[1]>='A' && buf[1]<='Z')) || (buf[1]>='a' && buf[1]<='z') )
83 return 0;
84 if (buf[2]!='-' ) return 0;
85 if (buf[3]!=' ' ) return 0;
86 return 1;
87 }
88 static int
readmore(FILE * fp,char * buf,int bufsize,int * bufpos,str * line)89 readmore( FILE *fp, char *buf, int bufsize, int *bufpos, str *line )
90 {
91 if ( line->len ) return 1;
92 else return str_fget( fp, buf, bufsize, bufpos, line );
93 }
94
95 static int
copacin_readf(FILE * fp,char * buf,int bufsize,int * bufpos,str * line,str * reference,int * fcharset)96 copacin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, str *line, str *reference, int *fcharset )
97 {
98 int haveref = 0, inref=0;
99 char *p;
100 *fcharset = CHARSET_UNKNOWN;
101 while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) {
102 /* blank line separates */
103 if ( line->data==NULL ) continue;
104 if ( inref && line->len==0 ) haveref=1;
105 p = &(line->data[0]);
106 /* Recognize UTF8 BOM */
107 if ( line->len > 2 &&
108 (unsigned char)(p[0])==0xEF &&
109 (unsigned char)(p[1])==0xBB &&
110 (unsigned char)(p[2])==0xBF ) {
111 *fcharset = CHARSET_UNICODE;
112 p += 3;
113 }
114 if ( copacin_istag( p ) ) {
115 if ( inref ) str_addchar( reference, '\n' );
116 str_strcatc( reference, p );
117 inref = 1;
118 } else if ( inref ) {
119 /* copac puts tag only on 1st line */
120 if ( *p ) p++;
121 if ( *p ) p++;
122 if ( *p ) p++;
123 if ( *p ) {
124 str_addchar( reference, ' ' );
125 str_strcatc( reference, p );
126 }
127 }
128 str_empty( line );
129 }
130 return haveref;
131 }
132
133 /*****************************************************
134 PUBLIC: int copacin_processf()
135 *****************************************************/
136
137 static const char*
copacin_addfield(const char * p,str * tag,str * value)138 copacin_addfield( const char *p, str *tag, str *value )
139 {
140 int i;
141
142 str_empty( tag );
143 str_empty( value );
144
145 i = 0;
146 while ( i<3 && *p ) {
147 str_addchar( tag, *p );
148 p++;
149 i++;
150 }
151
152 while ( *p==' ' || *p=='\t' ) p++;
153
154 while ( *p && *p!='\r' && *p!='\n' ) {
155 str_addchar( value, *p );
156 p++;
157 }
158
159 str_trimendingws( value );
160
161 while ( *p=='\n' || *p=='\r' ) p++;
162
163 return p;
164 }
165
166 static const char *
copacin_nextline(const char * p)167 copacin_nextline( const char *p )
168 {
169 while ( *p && *p!='\n' && *p!='\r') p++;
170 while ( *p=='\n' || *p=='\r' ) p++;
171 return p;
172 }
173
174 static int
copacin_processf(fields * copacin,const char * p,const char * filename,long nref,param * pm)175 copacin_processf( fields *copacin, const char *p, const char *filename, long nref, param *pm )
176 {
177 int status, ret = 1;
178 str tag, value;
179
180 str_init( &tag );
181 str_init( &value );
182
183 while ( *p ) {
184
185 p = skip_ws( p );
186
187 if ( copacin_istag( p ) ) {
188 p = copacin_addfield( p, &tag, &value );
189 /* don't add empty strings */
190 if ( str_has_value( &tag ) && str_has_value( &value ) ) {
191 status = fields_add( copacin, str_cstr( &tag ), str_cstr( &value ), LEVEL_MAIN );
192 if ( status!=FIELDS_OK ) {
193 ret = 0;
194 goto out;
195 }
196 }
197 }
198
199 else {
200 p = copacin_nextline( p );
201 }
202 }
203
204 out:
205 str_free( &tag );
206 str_free( &value );
207
208 return ret;
209 }
210
211 /*****************************************************
212 PUBLIC: int copacin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR
213 *****************************************************/
214
215 /* copac names appear to always start with last name first, but don't
216 * always seem to have a comma after the name
217 *
218 * editors seem to be stuck in as authors with the tag "[Editor]" in it
219 */
220 static int
copacin_person(fields * bibin,int n,str * intag,str * invalue,int level,param * pm,char * outtag,fields * bibout)221 copacin_person( fields *bibin, int n, str *intag, str *invalue, int level, param *pm, char *outtag, fields *bibout )
222 {
223 char *usetag = outtag, editor[]="EDITOR";
224 int comma = 0, i, status;
225 str usename, *s;
226 slist tokens;
227
228 if ( slist_find( &(pm->asis), invalue ) !=-1 ||
229 slist_find( &(pm->corps), invalue ) !=-1 ) {
230 return add_name( bibout, outtag, str_cstr( invalue ), level, &(pm->asis), &(pm->corps) );
231 }
232
233 slist_init( &tokens );
234 str_init( &usename );
235
236 status = slist_tokenize( &tokens, invalue, " ", 1 );
237 if ( status!=SLIST_OK ) return BIBL_ERR_MEMERR;
238
239 for ( i=0; i<tokens.n; ++i ) {
240 s = slist_str( &tokens, i );
241 if ( !strcmp( str_cstr( s ), "[Editor]" ) ) {
242 usetag = editor;
243 str_empty( s );
244 } else if ( s->len && s->data[s->len-1]==',' ) {
245 comma++;
246 }
247 }
248
249 if ( comma==0 && tokens.n ) {
250 s = slist_str( &tokens, 0 );
251 str_addchar( s, ',' );
252 }
253
254 for ( i=0; i<tokens.n; ++i ) {
255 s = slist_str( &tokens, i );
256 if ( str_is_empty( s ) ) continue;
257 if ( i ) str_addchar( &usename, ' ' );
258 str_strcat( &usename, s );
259 }
260
261 slist_free( &tokens );
262
263 status = add_name( bibout, usetag, str_cstr( &usename ), level, &(pm->asis), &(pm->corps) );
264
265 str_free( &usename );
266
267 return status;
268 }
269
270 static void
copacin_report_notag(param * p,char * tag)271 copacin_report_notag( param *p, char *tag )
272 {
273 if ( p->verbose ) {
274 if ( p->progname ) fprintf( stderr, "%s: ", p->progname );
275 fprintf( stderr, "Cannot find tag '%s'\n", tag );
276 }
277 }
278
279 static int
copacin_convertf(fields * bibin,fields * bibout,int reftype,param * p)280 copacin_convertf( fields *bibin, fields *bibout, int reftype, param *p )
281 {
282 static int (*convertfns[NUM_REFTYPES])(fields *, int, str *, str *, int, param *, char *, fields *) = {
283 [ 0 ... NUM_REFTYPES-1 ] = generic_null,
284 [ SIMPLE ] = generic_simple,
285 [ TITLE ] = generic_title,
286 [ NOTES ] = generic_notes,
287 [ SERIALNO ] = generic_serialno,
288 [ PERSON ] = copacin_person
289 };
290
291 int process, level, i, nfields, status = BIBL_OK;
292 str *intag, *invalue;
293 char *outtag;
294
295 nfields = fields_num( bibin );
296 for ( i=0; i<nfields; ++i ) {
297
298 intag = fields_tag( bibin, i, FIELDS_STRP );
299
300 if ( !translate_oldtag( str_cstr( intag ), reftype, p->all, p->nall, &process, &level, &outtag ) ) {
301 copacin_report_notag( p, str_cstr( intag ) );
302 continue;
303 }
304
305 invalue = fields_value( bibin, i, FIELDS_STRP );
306
307 status = convertfns[ process ] ( bibin, i, intag, invalue, level, p, outtag, bibout );
308 if ( status!=BIBL_OK ) return status;
309
310 }
311
312 return status;
313 }
314