1 #include <config.h>
2 #include "xctype.h"
3 #include "xstat.h"
4 #include "xstring.h"
5 #include "xstdlib.h"
6 
7 RCSID("$Id: chek.c,v 1.10 2017/06/09 14:47:20 beebe Exp beebe $")
8 
9 #include "yesorno.h"
10 #include "match.h"			/* must come AFTER yesorno.h */
11 #include "token.h"
12 #include "typedefs.h"			/* must come AFTER match.h */
13 
14 #if defined(HAVE_PATTERNS)
15 #define PATTERN_MATCHES(string,pattern) (match_pattern(string,pattern) == YES)
16 #else /* NOT defined(HAVE_PATTERNS) */
17 #define PATTERN_MATCHES(string,pattern) match_regexp(string,pattern)
18 #endif /* defined(HAVE_PATTERNS) */
19 
20 #define	PT_CHAPTER	0		/* index in pattern_names[] */
21 #define	PT_MONTH	1		/* index in pattern_names[] */
22 #define	PT_NUMBER	2		/* index in pattern_names[] */
23 #define	PT_PAGES	3		/* index in pattern_names[] */
24 #define	PT_VOLUME	4		/* index in pattern_names[] */
25 #define	PT_YEAR		5		/* index in pattern_names[] */
26 
27 #if !defined(STD_MAX_TOKEN)
28 /* This was 1000 in original BibTeX, then changed to 5000 for TeX Live */
29 /* builds up to 2011, and changed again for 2012--2019 releases to 20000 */
30 /* See glob_str_size in : $prefix/texlive/YYYY/texmf-dist/web2c/texmf.cnf */
31 /* #define STD_MAX_TOKEN	((size_t)1000)	*/ /* Standard BibTeX limit */
32 /* #define STD_MAX_TOKEN	((size_t)20000) */ /* TeX Live 2012--2018 limit */
33 #define STD_MAX_TOKEN	((size_t)200000)	/* TeX Live 2019--2021 limit */
34 
35 #endif /* !defined(STD_MAX_TOKEN) */
36 
37 #define UNKNOWN_CODEN	"??????"
38 #define MAX_CODEN	(sizeof(UNKNOWN_CODEN)-1)
39 
40 #define UNKNOWN_ISBN	"??????????"
41 #define MAX_ISBN	(sizeof(UNKNOWN_ISBN)-1)
42 
43 #define UNKNOWN_ISBN_13	"?????????????"
44 #define MAX_ISBN_13	(sizeof(UNKNOWN_ISBN_13)-1)
45 
46 #define UNKNOWN_ISSN	"????????"
47 #define MAX_ISSN	(sizeof(UNKNOWN_ISSN)-1)
48 
49 extern YESorNO	check_values;		/* NO: suppress value checks */
50 extern char	current_field[];	/* field name */
51 extern char	current_key[];		/* string value */
52 extern char	current_value[];	/* string value */
53 extern NAME_PAIR month_pair[];
54 extern PATTERN_NAMES pattern_names[];
55 extern char	shared_string[];
56 extern FILE	*stdlog;		/* usually stderr */
57 extern YESorNO	stdlog_on_stdout;	/* NO for separate files */
58 
59 extern void	error ARGS((const char *msg_));
60 extern void	ISBN_hyphenate ARGS((/*@out@*/ char *s_, /*@out@*/ char *t_, size_t maxs_));
61 extern void	ISBN_13_hyphenate ARGS((/*@out@*/ char *s_, /*@out@*/ char *t_, size_t maxs_));
62 extern void	warning ARGS((const char *msg_));
63 
64 void		check_chapter ARGS((void));
65 void		check_DOI ARGS((void));
66 void		check_CODEN ARGS((void));
67 void		check_ISBN ARGS((void));
68 void		check_ISSN ARGS((void));
69 YESorNO		check_junior ARGS((const char *last_name_));
70 void		check_key ARGS((void));
71 void		check_length ARGS((size_t n_));
72 void		check_month ARGS((void));
73 void		check_number ARGS((void));
74 void		check_other ARGS((void));
75 void		check_pages ARGS((void));
76 void		check_URL ARGS((void));
77 void		check_volume ARGS((void));
78 void		check_year ARGS((void));
79 
80 static void	bad_CODEN ARGS((char CODEN_[6]));
81 static void	bad_ISBN ARGS((char ISBN_[11]));
82 static void	bad_ISBN_13 ARGS((char ISBN_13_[14]));
83 static void	bad_ISSN ARGS((char ISSN_[9]));
84 static YESorNO	check_patterns ARGS((PATTERN_TABLE *pt_,const char *value_));
85 static int	CODEN_character_value ARGS((int c_));
86 static size_t	copy_element ARGS((char *target_, size_t nt_, const char *source_, size_t ns_));
87 static void	incomplete_CODEN ARGS((char CODEN_[6]));
88 static YESorNO	is_CODEN_char ARGS((int c_, size_t n_));
89 static YESorNO	is_DOI_char ARGS((int c_, size_t n_));
90 static YESorNO	is_ISBN_char ARGS((int c_, size_t n_));
91 static YESorNO	is_ISBN_13_char ARGS((int c_, size_t n_));
92 static YESorNO	is_ISSN_char ARGS((int c_, size_t n_));
93 static YESorNO	is_URL_char ARGS((int c_, size_t n_));
94 static void	parse_list ARGS((const char *s,
95 				 YESorNO (*is_name_char_) ARGS((int c_, size_t n_)),
96 				 void (*validate_) ARGS((const char *CODEN_, size_t n_))));
97 static void	parse_element ARGS((/*@out@*/ parse_data *pd_));
98 static void	parse_separator ARGS((/*@out@*/ parse_data *pd_));
99 static void	validate_CODEN ARGS((const char *CODEN_, size_t n_));
100 static void	validate_DOI ARGS((const char *CODEN_, size_t n_));
101 static void	validate_ISBN ARGS((const char *ISBN_, size_t n_));
102 static void	validate_ISBN_13 ARGS((const char *ISBN_, size_t n_));
103 static void	validate_ISSN ARGS((const char *ISSN_, size_t n_));
104 static void	validate_URL ARGS((const char *CODEN_, size_t n_));
105 static void	unexpected ARGS((void));
106 
107 #define elementsof(v)		(sizeof(v) / sizeof(v[0]))
108 
109 #define ISBN_DIGIT_VALUE(c)	((((int)(c) == (int)'X') || ((int)(c) == (int)'x')) ? 10 : \
110 					((int)(c) - (int)'0'))
111 				/* correct only if digits are valid; */
112 				/* the code below ensures that */
113 
114 #define ISSN_DIGIT_VALUE(c)	ISBN_DIGIT_VALUE(c)
115 				/* ISSN digits are just like ISBN digits */
116 
117 #if defined(HAVE_STDC)
118 static void
bad_CODEN(char CODEN[7])119 bad_CODEN(char CODEN[7])
120 #else /* K&R style */
121 static void
122 bad_CODEN(CODEN)
123 char CODEN[7];
124 #endif
125 {
126     static const char fmt[] =
127 	"Invalid checksum for CODEN %c%c%c%c%c%c in ``%%f = %%v''";
128     char msg[sizeof(fmt)];
129 
130 #define XCODEN(n)	(int)((CODEN[n] == '\0') ? '?' : CODEN[n])
131 
132     (void)sprintf(msg, fmt,
133 		  XCODEN(1), XCODEN(2), XCODEN(3), XCODEN(4), XCODEN(5), XCODEN(6));
134     warning(msg);	/* should be error(), but some journals might have */
135 			/* invalid CODENs (some books have invalid ISBNs) */
136 }
137 
138 
139 #if defined(HAVE_STDC)
140 static void
bad_ISBN(char ISBN[11])141 bad_ISBN(char ISBN[11])
142 #else /* K&R style */
143 static void
144 bad_ISBN(ISBN)
145 char ISBN[11];
146 #endif
147 {
148 #define MAXISBN	(13+1)	/* space for correctly hyphenated ISBN, plus NUL */
149     static const char fmt[] = "Invalid checksum for ISBN %s in ``%%f = %%v''";
150     char msg[sizeof(fmt)+MAXISBN-1-2];
151     char s[MAXISBN];
152     char t[MAXISBN];
153     size_t n;
154 
155     (void)strcpy(s,UNKNOWN_ISBN);
156     n = strlen(&ISBN[1]);
157     (void)memcpy(s,&ISBN[1],(n > sizeof(s)) ? sizeof(s) : n);
158     s[10] = '\0';
159     ISBN_hyphenate(s,t,sizeof(s));
160 
161     (void)sprintf(msg, fmt, s);
162     warning(msg);	/* used to be error(), but some books actually have */
163 			/* invalid ISBNs */
164 }
165 
166 
167 #if defined(HAVE_STDC)
168 static void
bad_ISBN_13(char ISBN_13[13+1])169 bad_ISBN_13(char ISBN_13[13 + 1])
170 #else /* K&R style */
171 static void
172 bad_ISBN_13(ISBN_13)
173 char ISBN_13[13 + 1];
174 #endif
175 {
176 #define MAXISBN_13	(13 + 3 + 1)	/* space for correctly hyphenated ISBN_13, plus NUL */
177     static const char fmt[] = "Invalid checksum for ISBN_13 %s in ``%%f = %%v''";
178     char msg[sizeof(fmt) + MAXISBN_13 - 1 - 2];
179     char s[MAXISBN_13];
180     char t[MAXISBN_13];
181     size_t n;
182 
183     (void)strcpy(s,UNKNOWN_ISBN_13);
184     n = strlen(&ISBN_13[1]);
185     (void)memcpy(s,&ISBN_13[1],(n > sizeof(s)) ? sizeof(s) : n);
186     s[13] = '\0';
187     ISBN_13_hyphenate(s,t,sizeof(s));
188 
189     (void)sprintf(msg, fmt, s);
190     warning(msg);	/* used to be error(), but some books actually have */
191 			/* invalid ISBN_13s */
192 }
193 
194 
195 #if defined(HAVE_STDC)
196 static void
bad_ISSN(char ISSN[9])197 bad_ISSN(char ISSN[9])
198 #else /* K&R style */
199 static void
200 bad_ISSN(ISSN)
201 char ISSN[9];
202 #endif
203 {
204     static const char fmt[] =
205 	"Invalid checksum for ISSN %c%c%c%c-%c%c%c%c in ``%%f = %%v''";
206     char msg[sizeof(fmt)];
207 
208 #define XISSN(n)	(int)((ISSN[n] == '\0') ? '?' : ISSN[n])
209 
210     (void)sprintf(msg, fmt, XISSN(1), XISSN(2), XISSN(3), XISSN(4),
211 		  XISSN(5), XISSN(6), XISSN(7), XISSN(8));
212     warning(msg);	/* used to be error(), but some journals might have */
213 			/* invalid ISSNs (some books have invalid ISBNs) */
214 }
215 
216 
217 void
check_chapter(VOID)218 check_chapter(VOID)
219 {
220 #if defined(HAVE_OLDCODE)
221     size_t k;
222     size_t n = strlen(current_value) - 1;
223 
224     /* match patterns like "23" and "23-1" */
225     for (k = 1; k < n; ++k)
226     {	/* omit first and last characters -- they are quotation marks */
227 	if (!(Isdigit(current_value[k]) || (current_value[k] == '-')))
228 	    break;
229     }
230     if (k == n)
231 	return;
232 #else /* NOT defined(HAVE_OLDCODE) */
233     if (check_patterns(pattern_names[PT_CHAPTER].table,current_value) == YES)
234 	return;
235 #endif /* defined(HAVE_OLDCODE) */
236 
237     unexpected();
238 }
239 
240 
241 void
check_CODEN(VOID)242 check_CODEN(VOID)
243 {
244     parse_list(current_value, is_CODEN_char, validate_CODEN);
245 }
246 
247 
248 void
check_DOI(VOID)249 check_DOI(VOID)
250 {
251     parse_list(current_value, is_DOI_char, validate_DOI);
252 
253     if ( IN_SET(current_value, ' ') ||
254 	 IN_SET(current_value, ',') ||
255 	 IN_SET(current_value, ';') )
256 	warning("Unexpected space or list separator in DOI value ``%v''");
257 }
258 
259 
260 void
check_inodes(VOID)261 check_inodes(VOID)
262 {
263     struct stat buflog;
264     struct stat bufout;
265 
266     stdlog_on_stdout = YES;			/* assume the worst initially */
267 
268     (void)fstat(fileno(stdlog),&buflog);
269     (void)fstat(fileno(stdout),&bufout);
270 
271 #if OS_UNIX
272     stdlog_on_stdout = ((buflog.st_dev == bufout.st_dev) &&
273 			(buflog.st_ino == bufout.st_ino)) ? YES : NO;
274 #endif /* OS_UNIX */
275 
276 #if OS_PCDOS
277     /* No inodes, so use other fields instead */
278     stdlog_on_stdout = ((buflog.st_dev == bufout.st_dev) &&
279 			(buflog.st_mode == bufout.st_mode) &&
280 			(buflog.st_size == bufout.st_size) &&
281 			(buflog.st_ctime == bufout.st_ctime)) ? YES : NO;
282 #endif /* OS_PCDOS */
283 
284 #if OS_VAXVMS
285     /* Inode field is 3 separate values */
286     stdlog_on_stdout = ((buflog.st_dev == bufout.st_dev) &&
287 			(buflog.st_ino[0] == bufout.st_ino[0]) &&
288 			(buflog.st_ino[1] == bufout.st_ino[1]) &&
289 			(buflog.st_ino[2] == bufout.st_ino[2])) ? YES : NO;
290 #endif /* OS_VAXVMS */
291 
292 }
293 
294 
295 void
check_ISBN(VOID)296 check_ISBN(VOID)
297 {
298     char t[MAX_TOKEN_SIZE];
299 
300     /* Supply correct hyphenation for all ISBNs */
301     ISBN_hyphenate(current_value,t,sizeof(t)/sizeof(t[0]));
302 
303     parse_list(current_value, is_ISBN_char, validate_ISBN);
304 }
305 
306 
307 void
check_ISBN_13(VOID)308 check_ISBN_13(VOID)
309 {
310     char t[MAX_TOKEN_SIZE];
311 
312     /* Supply correct hyphenation for all ISBN-13s */
313     ISBN_13_hyphenate(current_value, t, sizeof(t) / sizeof(t[0]));
314 
315     parse_list(current_value, is_ISBN_13_char, validate_ISBN_13);
316 }
317 
318 
319 void
check_ISSN(VOID)320 check_ISSN(VOID)
321 {
322     parse_list(current_value, is_ISSN_char, validate_ISSN);
323 }
324 
325 
326 void
check_ISSN_L(VOID)327 check_ISSN_L(VOID)
328 {
329     parse_list(current_value, is_ISSN_char, validate_ISSN);
330 
331     if (strlen(current_value) != 11)	/* "1234-5689" */
332 	warning("Unexpected ISSN-L field length in ``%v''");
333 }
334 
335 
336 #if defined(HAVE_STDC)
337 YESorNO
check_junior(const char * last_name)338 check_junior(const char *last_name)
339 #else /* K&R style */
340 YESorNO
341 check_junior(last_name)
342 const char *last_name;
343 #endif
344 {				/* return YES: name is Jr.-like, else: NO */
345     int b_level;		/* brace level */
346     static const char *juniors[] =
347     {				/* name parts that parse like "Jr." */
348 	"Jr",
349 	"Jr.",
350 	"Sr",
351 	"Sr.",
352 	"SJ",
353 	"S.J.",
354 	"S. J.",
355 	(const char*)NULL,	/* list terminator */
356     };
357     int k;			/* index into juniors[] */
358     int n;			/* index into last_name[] */
359 
360     for (n = 0, b_level = 0; last_name[n] != '\0'; ++n)
361     {				/* check for "Smith, Jr" and "Smith Jr" and */
362 	switch (last_name[n])	/* convert to "{Smith, Jr}" and "{Smith Jr}" */
363 	{
364 	case '{':
365 	    b_level++;
366 	    break;
367 
368 	case '}':
369 	    b_level--;
370 	    break;
371 
372 	case ',':
373 	    if (b_level == 0)
374 		return (YES);
375 	    break;
376 
377 	case '\t':
378 	case ' ':		/* test for Jr.-like name */
379 	    if (b_level == 0)
380 	    {
381 		for (k = 0; juniors[k] != (const char*)NULL; ++k)
382 		{
383 		    if (strnicmp(&last_name[n+1],juniors[k],strlen(juniors[k]))
384 			== 0)
385 			return (YES);
386 		}			/* end for (k...) */
387 		if (strcspn(&last_name[n+1],"IVX") == 0)
388 		    return (YES); /* probably small upper-case Roman number */
389 	    }
390 	    break;
391 
392 	default:
393 	    break;
394 	}				/* end switch (last_name[n]) */
395     }					/* end for (n = 0,...) */
396     return (NO);
397 }
398 
399 
400 void
check_key(VOID)401 check_key(VOID)
402 {
403     int k;				/* index into pattern_names[] */
404 
405     for (k = 0; pattern_names[k].name != (const char*)NULL; ++k)
406     {
407 	if (stricmp(pattern_names[k].name,current_key) == 0)
408 	{				/* then found the required table */
409 	    if (check_patterns(pattern_names[k].table,current_key) == NO)
410 		warning("Unexpected citation key ``%k''");
411 	    return;
412 	}
413     }
414 }
415 
416 
417 #if defined(HAVE_STDC)
418 void
check_length(size_t n)419 check_length(size_t n)
420 #else /* K&R style */
421 void
422 check_length(n)
423 size_t n;
424 #endif
425 {
426     if ((check_values == YES) && (n >= STD_MAX_TOKEN))
427 	warning("String length exceeds standard BibTeX limit for ``%f'' entry");
428 }
429 
430 
431 void
check_month(VOID)432 check_month(VOID)
433 {
434     size_t n;
435 
436     n = strlen(current_value);
437 
438     if (n == 3)			/* check for match against standard abbrevs */
439     {
440 	int m;			/* month index */
441 
442 	for (m = 0; month_pair[m].old_name != (const char*)NULL; ++m)
443 	{
444 	    if (stricmp(month_pair[m].new_name,current_value) == 0)
445 		return;
446 	}
447     }
448 
449     /* Hand coding for the remaining patterns is too ugly to contemplate,
450        so we only provide the checking when real pattern matching is
451        available. */
452 
453 #if !defined(HAVE_OLDCODE)
454     if (check_patterns(pattern_names[PT_MONTH].table,current_value) == YES)
455 	return;
456 #endif /* !defined(HAVE_OLDCODE) */
457 
458     unexpected();
459 }
460 
461 
462 void
check_number(VOID)463 check_number(VOID)
464 {
465 #if defined(HAVE_OLDCODE)
466     size_t k;
467     size_t n = strlen(current_value) - 1;
468 
469     /* We expect the value string to match the regexp "[0-9a-zA-Z---,/ ()]+
470     to handle values like "UMIACS-TR-89-11, CS-TR-2189, SRC-TR-89-13",
471     "RJ 3847 (43914)", "{STAN-CS-89-1256}", "UMIACS-TR-89-3.1, CS-TR-2177.1",
472     "TR\#89-24", "23", "23-27", and "3+4". */
473 
474     for (k = 1; k < n; ++k)
475     {	/* omit first and last characters -- they are quotation marks */
476 	if (!(     Isalnum(current_value[k])
477 		|| Isspace(current_value[k]) || (current_value[k] == '-')
478 		|| (current_value[k] == '+') || (current_value[k] == ',')
479 		|| (current_value[k] == '.') || (current_value[k] == '/')
480 		|| (current_value[k] == '#') || (current_value[k] == '\\')
481 		|| (current_value[k] == '(') || (current_value[k] == ')')
482 		|| (current_value[k] == '{') || (current_value[k] == '}') ))
483 	    break;
484     }
485     if (k == n)
486 	return;
487 #else /* NOT defined(HAVE_OLDCODE) */
488     if (check_patterns(pattern_names[PT_NUMBER].table,current_value) == YES)
489 	return;
490 #endif /* defined(HAVE_OLDCODE) */
491 
492     unexpected();
493 }
494 
495 
496 void
check_other(VOID)497 check_other(VOID)
498 {
499     int k;				/* index into pattern_names[] */
500 
501     for (k = 0; pattern_names[k].name != (const char*)NULL; ++k)
502     {
503 	if (stricmp(pattern_names[k].name,current_field) == 0)
504 	{				/* then found the required table */
505 	    if (check_patterns(pattern_names[k].table,current_value) == NO)
506 		unexpected();
507 	    return;
508 	}
509     }
510 }
511 
512 
513 void
check_pages(VOID)514 check_pages(VOID)
515 {
516     /* Need to handle "B721--B729" as well as "721--729"; some
517        physics journals use an initial letter in page number. */
518 
519 #if defined(HAVE_OLDCODE)
520     int number = 1;
521     size_t k;
522     size_t n = strlen(current_value) - 1;
523 
524     /* We expect the value string to match the regexps [0-9]+ or
525        [0-9]+--[0-9]+ */
526     for (k = 1; k < n; ++k)
527     {	/* omit first and last characters -- they are quotation marks */
528 	switch (current_value[k])
529 	{
530 	case '0':
531 	case '1':
532 	case '2':
533 	case '3':
534 	case '4':
535 	case '5':
536 	case '6':
537 	case '7':
538 	case '8':
539 	case '9':
540 	    if (number > 2)
541 	    {
542 		warning("More than 2 page numbers in ``%f = %v''");
543 		return;
544 	    }
545 	    break;
546 
547 	case '-':
548 	    number++;
549 	    if (current_value[k+1] != '-')	/* expect -- */
550 	    {
551 		warning(
552 		    "Use en-dash, --, to separate page numbers in ``%f = %v''");
553 		return;
554 	    }
555 	    ++k;
556 	    if (current_value[k+1] == '-')	/* should not have --- */
557 	    {
558 		warning(
559 		    "Use en-dash, --, to separate page numbers in ``%f = %v''");
560 		return;
561 	    }
562 	    break;
563 
564 	case ',':
565 	    number++;
566 	    break;
567 
568 	default:
569 	    unexpected();
570 	    return;
571 	}
572     }
573 #else /* NOT defined(HAVE_OLDCODE) */
574     if (check_patterns(pattern_names[PT_PAGES].table,current_value) == YES)
575 	return;
576 #endif /* defined(HAVE_OLDCODE) */
577 
578     unexpected();
579 }
580 
581 
582 #if (defined(HAVE_PATTERNS) || defined(HAVE_REGEXP) || defined(HAVE_RECOMP))
583 
584 #if defined(HAVE_STDC)
585 static YESorNO
check_patterns(PATTERN_TABLE * pt,const char * value)586 check_patterns(PATTERN_TABLE* pt,const char *value)
587 #else /* K&R style */
588 static YESorNO
589 check_patterns(pt,value)
590 PATTERN_TABLE* pt;
591 const char *value;
592 #endif
593 {
594     /* Return YES if current_value[] matches a pattern, or there are no
595        patterns, and NO if there is a match failure.  Any message
596        associated with a successfully-matched pattern is printed before
597        returning. */
598 
599     int k;
600 
601     for (k = 0; k < pt->current_size; ++k)
602     {
603 	if (PATTERN_MATCHES(value,pt->patterns[k].pattern))
604 	{
605 	    if (pt->patterns[k].message != (const char*)NULL)
606 	    {
607 		if (pt->patterns[k].message[0] == '?') /* special error flag */
608 		    error(pt->patterns[k].message + 1);
609 		else		/* just normal warning */
610 		    warning(pt->patterns[k].message);
611 	    }
612 	    return (YES);
613 	}
614     }
615     return ((pt->current_size == 0) ? YES : NO);
616 }
617 #endif /* (defined(HAVE_PATTERNS) || defined(HAVE_REGEXP) ||
618 	   defined(HAVE_RECOMP)) */
619 
620 
621 void
check_URL(VOID)622 check_URL(VOID)
623 {
624     parse_list(current_value, is_URL_char, validate_URL);
625 }
626 
627 
628 void
check_volume(VOID)629 check_volume(VOID)
630 {
631 #if defined(HAVE_OLDCODE)
632     size_t k;
633     size_t n = strlen(current_value) - 1;
634 
635     /* Match patterns like "27", "27A", "27/3", "27A 3", "SMC-13", "VIII",
636        "B", "{IX}", "1.2", "Special issue A", and  "11 and 12".  However,
637        NEVER match pattern like "11(5)", since that is probably an erroneous
638        incorporation of issue number into the volume value. */
639 
640     for (k = 1; k < n; ++k)
641     {	/* omit first and last characters -- they are quotation marks */
642 	if (!(     Isalnum(current_value[k])
643 		|| (current_value[k] == '-')
644 		|| (current_value[k] == '/')
645 		|| (current_value[k] == '.')
646 		|| Isspace(current_value[k])
647 		|| (current_value[k] == '{')
648 		|| (current_value[k] == '}') ))
649 	{
650 	    unexpected();
651 	    return;
652 	}
653     }
654 #else /* NOT defined(HAVE_OLDCODE) */
655     if (check_patterns(pattern_names[PT_VOLUME].table,current_value) == YES)
656 	return;
657 #endif /* defined(HAVE_OLDCODE) */
658 
659     unexpected();
660 }
661 
662 
663 void
check_year(VOID)664 check_year(VOID)
665 {
666     char *p;
667     char *q;
668     long year;
669 
670 #if defined(HAVE_OLDCODE)
671     size_t k;
672     size_t n;
673 
674     /* We expect the value string to match the regexp [0-9]+ */
675     for (k = 1, n = strlen(current_value) - 1; k < n; ++k)
676     {	/* omit first and last characters -- they are quotation marks */
677 	if (!Isdigit(current_value[k]))
678 	{
679 	    warning("Non-digit found in field value of ``%f = %v''");
680 	    return;
681 	}
682     }
683 #else /* NOT defined(HAVE_PATTERNS) */
684     if (check_patterns(pattern_names[PT_YEAR].table,current_value) == YES)
685 	return;
686     unexpected();
687 #endif /* defined(HAVE_PATTERNS) */
688 
689     for (p = current_value; (*p != '\0') ; ) /* now validate all digit strings */
690     {
691 	if (Isdigit(*p))	/* then have digit string */
692 	{			/* now make sure year is `reasonable' */
693 	    year = strtol(p,&q,10);
694 	    if ((year < 1800L) || (year > 2099L))
695 		warning("Suspicious year in ``%f = %v''");
696 	    p = q;
697 	}
698 	else			/* ignore other characters */
699 	    p++;
700     }
701 }
702 
703 
704 #if defined(HAVE_STDC)
705 static int
CODEN_character_value(int c)706 CODEN_character_value(int c)
707 #else /* K&R style */
708 static int
709 CODEN_character_value(c)
710 int c;
711 #endif
712 {
713     if (((int)'a' <= c) && (c <= (int)'z'))
714 	return ((c - (int)'a' + 1));
715     else if (((int)'A' <= c) && (c <= (int)'Z'))
716 	return ((c - (int)'A' + 1));
717     else if (((int)'1' <= c) && (c <= (int)'9'))
718 	return ((c - (int)'1' + 27));
719     else if (c == (int)'0')
720 	return (36);
721     else
722 	return (-1);
723 }
724 
725 
726 #if defined(HAVE_STDC)
727 static size_t
copy_element(char * target,size_t nt,const char * source,size_t ns)728 copy_element(char *target, size_t nt, const char *source, size_t ns)
729 #else /* K&R style */
730 static size_t
731 copy_element(target, nt, source, ns)
732 char *target;
733 size_t nt;
734 const char *source;
735 size_t ns;
736 #endif
737 {    /* Copy source[] into target[], excluding spaces and hyphens, and add a */
738      /* trailing NUL.  Return the number of characters left in source[], */
739      /* after ignoring trailing spaces and hyphens. */
740     size_t ks;
741     size_t kt;
742 
743     for (ks = 0, kt = 0; (ks < ns) && (kt < (nt - 1)); ++ks)
744     {
745 	if (!((source[ks] == '-') || Isspace(source[ks])))
746 	    target[kt++] = source[ks];
747     }
748     target[kt] = '\0';
749 
750     for ( ; (source[ks] == '-') || Isspace(source[ks]); ++ks)
751 	continue;		 /* skip trailing space and hyphens */
752 
753     return (size_t)(ns - ks);
754 }
755 
756 
757 #if defined(HAVE_STDC)
758 static void
incomplete_CODEN(char CODEN[7])759 incomplete_CODEN(char CODEN[7])
760 #else /* K&R style */
761 static void
762 incomplete_CODEN(CODEN)
763 char CODEN[7];
764 #endif
765 {
766     static const char fmt[] =
767 	"Incomplete CODEN %c%c%c%c%c should be %c%c%c%c%c%c in ``%%f = %%v''";
768     char msg[sizeof(fmt)];
769 
770     (void)sprintf(msg, fmt, CODEN[1], CODEN[2], CODEN[3], CODEN[4], CODEN[5],
771 		  CODEN[1], CODEN[2], CODEN[3], CODEN[4], CODEN[5], CODEN[6]);
772     warning(msg);	/* should be error(), but some journals might have */
773 			/* invalid CODENs (some books have invalid ISBNs) */
774 }
775 
776 
777 #if defined(HAVE_STDC)
778 static YESorNO
is_CODEN_char(int c,size_t n)779 is_CODEN_char(int c, size_t n)
780 #else /* K&R style */
781 static YESorNO
782 is_CODEN_char(c,n)
783 int c;
784 size_t n;
785 #endif
786 {
787     static size_t n_significant = 0;
788 		/* number of significant chars already seen in current CODEN */
789 
790     /* CODENs match [A-Z]-*[A-Z]-*[A-Z]-*[A-Z]-*[A-Z]-*[A-Z0-9], but we
791        also allow lower-case letters. */
792 
793     if (n == 0)				/* start new CODEN */
794 	n_significant = 0;
795 
796     /* embedded hyphens are accepted, but are not significant */
797     if ((n_significant > 0) && (c == (int)'-'))
798 	return (YES);
799     else if ((n_significant < 5) && Isalpha(c))
800     {
801 	n_significant++;
802 	return (YES);
803     }
804     else if ((n_significant >= 5) && Isalnum(c)) /* sixth char can be a digit */
805     {
806 	n_significant++;
807 	return (YES);
808     }
809 
810     return (NO);
811 }
812 
813 
814 #if defined(HAVE_STDC)
815 static YESorNO
is_DOI_char(int c,size_t n)816 is_DOI_char(int c, size_t n)
817 #else /* K&R style */
818 static YESorNO
819 is_DOI_char(c,n)
820 int c;
821 size_t n;
822 #endif
823 {
824     return (Isprint(c) ? YES : NO);    /* DOIs match any printable string */
825 }
826 
827 
828 #if defined(HAVE_STDC)
829 static YESorNO
is_ISBN_char(int c,size_t n)830 is_ISBN_char(int c, size_t n)
831 #else /* K&R style */
832 static YESorNO
833 is_ISBN_char(c,n)
834 int c;
835 size_t n;
836 #endif
837 {
838     static size_t n_significant = 0;
839 		/* number of significant chars already seen in current CODEN */
840 
841     /* ISBNs match
842 	[0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9][- ]*
843 	[0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9xX]
844     */
845 
846     if (n == 0)				/* start new ISBN */
847 	n_significant = 0;
848 
849     /* embedded hyphens and space are accepted, but are not significant */
850     if ((n_significant > 0) && ((c == (int)'-') || Isspace(c)))
851 	return (YES);
852     else if ((n_significant < 9) && Isdigit(c))
853     {
854 	n_significant++;
855 	return (YES);
856     }
857     else if ((n_significant >= 9) && (Isdigit(c) || (c == (int)'X') || (c == (int)'x')))
858     {					/* tenth character may be [0-9xX] */
859 	n_significant++;
860 	return (YES);
861     }
862 
863     return (NO);
864 }
865 
866 
867 #if defined(HAVE_STDC)
868 static YESorNO
is_ISBN_13_char(int c,size_t n)869 is_ISBN_13_char(int c, size_t n)
870 #else /* K&R style */
871 static YESorNO
872 is_ISBN_13_char(c,n)
873 int c;
874 size_t n;
875 #endif
876 {
877     return (is_ISBN_char(c, n));
878 }
879 
880 
881 #if defined(HAVE_STDC)
882 static YESorNO
is_ISSN_char(int c,size_t n)883 is_ISSN_char(int c, size_t n)
884 #else /* K&R style */
885 static YESorNO
886 is_ISSN_char(c,n)
887 int c;
888 size_t n;
889 #endif
890 {
891     static size_t n_significant = 0;
892 		/* number of significant chars already seen in current CODEN */
893 
894     /* ISSNs match
895 	[0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9][- ]*
896 	[0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9xX]
897     */
898 
899     if (n == 0)				/* start new ISSN */
900 	n_significant = 0;
901 
902     /* embedded hyphens and space are accepted, but are not significant */
903     if ((n_significant > 0) && ((c == (int)'-') || Isspace(c)))
904 	return (YES);
905     else if ((n_significant < 7) && Isdigit(c))
906     {
907 	n_significant++;
908 	return (YES);
909     }
910     else if ((n_significant >= 7) && (Isdigit(c) || (c == (int)'X') || (c == (int)'x')))
911     {					/* eighth character may be [0-9xX] */
912 	n_significant++;
913 	return (YES);
914     }
915 
916     return (NO);
917 }
918 
919 
920 #if defined(HAVE_STDC)
921 static YESorNO
is_URL_char(int c,size_t n)922 is_URL_char(int c, size_t n)
923 #else /* K&R style */
924 static YESorNO
925 is_URL_char(c,n)
926 int c;
927 size_t n;
928 #endif
929 {
930     return (Isprint(c) ? YES : NO);    /* URLs match any printable string */
931 }
932 
933 
934 #if defined(HAVE_STDC)
935 static void
parse_list(const char * s,YESorNO (* is_name_char)ARGS ((int c,size_t n)),void (* validate)ARGS ((const char * s,size_t n)))936 parse_list(const char *s, YESorNO (*is_name_char) ARGS((int c, size_t n)),
937 	   void (*validate) ARGS((const char *s, size_t n)))
938 #else /* K&R style */
939 static void
940 parse_list(s, is_name_char, validate)
941 const char *s;
942 YESorNO (*is_name_char) ARGS((int c, size_t n));
943 void (*validate) ARGS((const char *s, size_t n));
944 #endif
945 {
946     parse_data pd;
947 
948     /*******************************************************************
949        Parse a list of CODEN, ISBN, or ISSN elements, according to the
950        grammar:
951 
952 	       LIST : NAME
953 		      | NAME SEPARATOR LIST
954 
955 	       SEPARATOR : [not-a-token-char]+ | (nested balanced parentheses)
956 
957 	       NAME : SEPARATOR* NAME'
958 
959 	       NAME' : [token-char]+
960 
961        This simple, and permissive, grammar accepts any strings that
962        contain sequences of zero or more CODEN, ISBN, or ISSN
963        elements, separated by one or more of characters which are not
964        themselves legal element characters.  The first element in the
965        list may be preceded by any number of non-element characters.
966        Comments are supported as arbitrary strings inside balanced
967        parentheses, allowing lists like
968 
969 		"0-387-97621-4 (invalid ISBN checksum), 3-540-97621-3"
970 
971 		"0020-0190 (1982--1990), 0733-8716 (1991--)"
972 
973 		"0-8493-0190-4 (set), 0-8493-0191-2 (v. 1),
974 		 0-8493-0192-0 (v. 2), 0-8493-0193-9 (v. 3)"
975 
976        The distinction between NAME' and SEPARATOR characters is made
977        by the argument function, (*is_name_char)(), and the validation
978        of the elements is done by the argument function (*validate)().
979 
980        This generality makes it possible for the same code to be
981        reused for at least CODEN, ISBN, and ISSN values, and possibly
982        others in future versions of this program.
983 
984        Tokens are not copied from the list, so no additional dynamic
985        string storage is required.
986     *******************************************************************/
987 
988     pd.s = s;
989     pd.is_name_char = is_name_char;
990 
991     for (;;)
992     {
993 	parse_separator(&pd);		/* may produce a zero-length token */
994 	parse_element(&pd);
995 	if (pd.token_length == 0)	/* no more tokens in list */
996 	    return;
997 	(*validate)(pd.token, pd.token_length);
998     }
999 }
1000 
1001 
1002 #if defined(HAVE_STDC)
1003 static void
parse_element(parse_data * pd)1004 parse_element(/*@out@*/ parse_data *pd)
1005 #else /* K&R style */
1006 static void
1007 parse_element(pd)
1008 /*@out@*/ parse_data *pd;
1009 #endif
1010 {
1011     size_t n;
1012 
1013     for (n = 0, pd->token = pd->s; (*pd->s != '\0') && ((*pd->is_name_char)((int)*pd->s,n) == YES);
1014 	 n++, pd->s++)
1015 	continue;
1016 
1017     pd->token_length = (size_t)(pd->s - pd->token);
1018 }
1019 
1020 
1021 #if defined(HAVE_STDC)
1022 static void
parse_separator(parse_data * pd)1023 parse_separator(/*@out@*/ parse_data *pd)
1024 #else /* K&R style */
1025 static void
1026 parse_separator(pd)
1027 /*@out@*/ parse_data *pd;
1028 #endif
1029 {
1030     size_t n;
1031     int paren_level;			/* parenthesis level */
1032 
1033     pd->token = pd->s;
1034 
1035     for (n = 0, paren_level = 0;
1036 	 ((*pd->s != '\0') && (((*pd->is_name_char)((int)*pd->s,n) == NO) || (paren_level > 0)));
1037 	n++, pd->s++)
1038     {
1039 	if (*pd->s == '(')
1040 	    paren_level++;
1041 	else if (*pd->s == ')')
1042 	{
1043 	    paren_level--;
1044 	    if (paren_level == 0)
1045 	        n = 0;
1046 	}
1047     }
1048 
1049     pd->token_length = (size_t)(pd->s - pd->token);
1050     if (paren_level != 0)
1051 	warning("Non-zero parenthesis level in ``%f = %v''");
1052 }
1053 
1054 
1055 static void
unexpected(VOID)1056 unexpected(VOID)
1057 {
1058     warning("Unexpected value in ``%f = %v''");
1059 }
1060 
1061 
1062 #if defined(HAVE_STDC)
1063 static void
validate_CODEN(const char * the_CODEN,size_t n)1064 validate_CODEN(const char *the_CODEN, size_t n)
1065 #else
1066 static void
1067 validate_CODEN(the_CODEN, n)
1068 const char *the_CODEN;
1069 size_t n;
1070 #endif
1071 {
1072     int checksum;
1073     char CODEN[1 + MAX_CODEN + 1];	/* saved CODEN for error messages */
1074 					/* (use slots 1..6 instead of 0..5) */
1075     size_t k;				/* index into CODEN[] */
1076     size_t nleft;
1077 
1078 #define CODEN_CHECK_CHARACTER(n)	"9ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678"[n]
1079 
1080     /*******************************************************************
1081        CODEN values are 6-character strings from the set [A-Z0-9],
1082        with a check digit stored in the 6th position given by
1083 
1084 	   (11*N1 + 7*N2 + 5*N3 + 3*N4 + 1*N5) mod 34 == X
1085 
1086        where the Nk are 1..26 for A..Z, and 27..36 for 1..9..0.
1087        However, the checksum X (in 0..33) is represented by the
1088        corresponding character in the different 34-character range
1089        [9A-Z2-8], which excludes digits 0 and 1 to avoid confusion
1090        with letters O and I.
1091 
1092        In library catalogs, the 6th CODEN digit is often omitted, so
1093        when we find it missing in a CODEN value string, we print a
1094        warning to tell the user what it should be.  However, we
1095        intentionally do NOT insert it into the bibclean output,
1096        because the value string may be corrupted, instead of just
1097        truncated.
1098 
1099        The largest possible sum above is 11*36 + 7*36 + 5*36 + 3*36 +
1100        1*36 = 36*(11 + 7 + 5 + 3 + 1) = 36*27 = 972, corresponding to
1101        the CODEN value 00000T, since 972 mod 34 = 20, which maps to
1102        the letter T.  In reality, the limit is lower than this,
1103        because the initial CODEN character is always alphabetic; the
1104        largest usable CODEN would then be Z0000, which has a checksum
1105        of 11*26 + 7*36 + 5*36 + 3*36 + 1*36 = 36*(11 + 7 + 5 + 3 + 1)
1106        - 10*11 = 862.  Even 16-bit (short) integers are adequate for
1107        this computation.
1108 
1109        Old CODEN values may be stored with a hyphen between the 4th
1110        and 5th characters, e.g. "JACS-A" and "JACS-AT", as well as
1111        just "JACSA" and "JACSAT".  Unlike ISBN and ISSN values, spaces
1112        are NOT used inside CODEN values.
1113     *******************************************************************/
1114 
1115     (void)strcpy(&CODEN[1], UNKNOWN_CODEN);
1116     nleft = copy_element(&CODEN[1], sizeof(CODEN)-1, the_CODEN, n);
1117 
1118     for (checksum = 0, k = 1; CODEN[k] != '\0'; ++k)
1119     {
1120 	if (k < MAX_CODEN)
1121 	{
1122 	    static int multiplier[] = { 0, 11, 7, 5, 3, 1 };
1123 
1124 	    checksum += CODEN_character_value((int)CODEN[k]) * multiplier[k];
1125 	}
1126 	else if (k == MAX_CODEN)
1127 	{
1128 	    if (CODEN_CHECK_CHARACTER(checksum % 34) != CODEN[k])
1129 		bad_CODEN(CODEN);
1130 	}
1131     }				/* end for (loop over CODEN[]) */
1132 
1133     if (strlen(&CODEN[1]) == (MAX_CODEN - 1))
1134     {		/* check digit omitted, so tell the user what it should be */
1135 	CODEN[MAX_CODEN] = CODEN_CHECK_CHARACTER(checksum % 34);
1136 	incomplete_CODEN(CODEN);
1137     }
1138     else if ((strlen(&CODEN[1]) != MAX_CODEN) || (nleft > 0))
1139 	bad_CODEN(CODEN);
1140 }
1141 
1142 
1143 #if defined(HAVE_STDC)
1144 static void
validate_DOI(const char * the_DOI,size_t n)1145 validate_DOI(const char *the_DOI, size_t n)
1146 #else
1147 static void
1148 validate_DOI(the_DOI, n)
1149 const char *the_DOI;
1150 size_t n;
1151 #endif
1152 {
1153     /*
1154     ** Typical input:
1155     **
1156     **     the_DOI = "\"http://dx.doi.org/10.1000/a.b.c\""
1157     **
1158     ** Notice that the delimiting quotation marks are part of the
1159     ** string value, so we must skip the first character in matching
1160     ** against members of the list of valid prefixes.
1161     */
1162 
1163     int k, match_count;
1164     static const char *doi_prefix_list[] =
1165     {	/* the DOI organization introduced new prefixes in 2016 */
1166 	"http://doi.org/",
1167 	"http://dx.doi.org/",	/* original single-prefix style (2000--2016) */
1168 	"https://doi.org/",
1169 	"https://dx.doi.org/",
1170 #if defined(DOI_RAW_VALID)
1171 	"10."
1172 #endif
1173     };
1174 
1175     for (k = 0, match_count = 0; k < elementsof(doi_prefix_list); ++k)
1176     {
1177 	if (strncmp(&the_DOI[1], doi_prefix_list[k], strlen(doi_prefix_list[k])) == 0)
1178 	    match_count++;
1179     }
1180 
1181     if (match_count == 0)
1182 	warning("Unexpected prefix in DOI value ``%v''");
1183 }
1184 
1185 
1186 #if defined(HAVE_STDC)
1187 static void
validate_ISBN(const char * the_ISBN,size_t n)1188 validate_ISBN(const char *the_ISBN, size_t n)
1189 #else
1190 static void
1191 validate_ISBN(the_ISBN, n)
1192 const char *the_ISBN;
1193 size_t n;
1194 #endif
1195 {
1196     int checksum;
1197     char ISBN[1 + MAX_ISBN + 1];	/* saved ISBN for error messages */
1198 					/* (use slots 1..10 instead of 0..9) */
1199     size_t k;				/* index into ISBN[] */
1200     size_t nleft;
1201 
1202     /*******************************************************************
1203        ISBN numbers are 10-character values from the set [0-9Xx], with
1204        a checksum given by
1205 
1206 		(sum(k=1:9) digit(k) * k) mod 11 == digit(10)
1207 
1208        where digits have their normal value, X (or x) as a digit has
1209        value 10, and spaces and hyphens are ignored.  The sum is
1210        bounded from above by 10*(1 + 2 + ... + 9) = 450, so even short
1211        (16-bit) integers are sufficient for the accumulation.
1212 
1213        ISBN digits are grouped into four parts separated by space or
1214        hyphen: countrygroupnumber-publishernumber-booknumber-checkdigit.
1215     *******************************************************************/
1216 
1217     (void)strcpy(&ISBN[1],UNKNOWN_ISBN);
1218     nleft = copy_element(&ISBN[1], sizeof(ISBN)-1, the_ISBN, n);
1219 
1220     for (checksum = 0, k = 1; ISBN[k] != '\0'; ++k)
1221     {
1222 	if (k < MAX_ISBN)
1223 	    checksum += ISBN_DIGIT_VALUE(ISBN[k]) * k;
1224 	else if (k == MAX_ISBN)
1225 	{
1226 	    if ((checksum % 11) != ISBN_DIGIT_VALUE(ISBN[k]))
1227 		bad_ISBN(ISBN);
1228 	}
1229     } 					/* end for (loop over ISBN[]) */
1230 
1231     if ((strlen(&ISBN[1]) != MAX_ISBN) || (nleft > 0))
1232 	bad_ISBN(ISBN);
1233 }
1234 
1235 
1236 #if defined(HAVE_STDC)
1237 static void
validate_ISBN_13(const char * the_ISBN_13,size_t n)1238 validate_ISBN_13(const char *the_ISBN_13, size_t n)
1239 #else
1240 static void
1241 validate_ISBN_13(the_ISBN_13, n)
1242 const char *the_ISBN_13;
1243 size_t n;
1244 #endif
1245 {
1246     int checksum;
1247     char ISBN_13[1 + MAX_ISBN_13 + 1];	/* saved ISBN_13 for error messages */
1248 					/* (use slots 1..13 instead of 0..12) */
1249     size_t k;				/* index into ISBN_13[] */
1250     size_t nleft;
1251 
1252     /*******************************************************************
1253        ISBN_13 numbers are 13-character values from the set [0-9Xx], with
1254        a final checksum digit given by
1255 
1256 		rem = (sum(k=1:12) digit(k) * weight(k)) mod 10
1257 		weight(k) = if (k odd) then 1 else 3
1258 		digit(13) = if (rem == 0) then 0 else (10 - rem)
1259 
1260        where digits have their normal value, X (or x) as a digit has
1261        value 10, and spaces and hyphens are ignored.  The sum is
1262        bounded from above by 3*(9 + 9 + ... + 9) = 324, so even
1263        short (16-bit) integers are sufficient for the accumulation.
1264 
1265        ISBN_13 digits are grouped into five parts separated by space
1266        or hyphen:
1267 
1268 	   978-countrygroupnumber-publishernumber-booknumber-checkdigit.
1269 
1270        The initial prefix changes to 979 when the 978 group is
1271        exhausted.
1272     *******************************************************************/
1273 
1274     (void)strcpy(&ISBN_13[1],UNKNOWN_ISBN_13);
1275     nleft = copy_element(&ISBN_13[1], sizeof(ISBN_13)-1, the_ISBN_13, n);
1276 
1277     for (checksum = 0, k = 1; ISBN_13[k] != '\0'; ++k)
1278     {
1279 	size_t weight;
1280 
1281 	weight = (k & 1) ? 1 : 3;
1282 
1283 	if (k < MAX_ISBN_13)
1284 	    checksum += ISBN_DIGIT_VALUE(ISBN_13[k]) * weight ;
1285 	else if (k == MAX_ISBN_13)
1286 	{
1287 	    size_t digit_13, rem;
1288 
1289 	    rem = checksum % 10;
1290 	    digit_13 = (rem == 0) ? 0 : (10 - rem);
1291 
1292 	    if (digit_13 != ISBN_DIGIT_VALUE(ISBN_13[k]))
1293 		bad_ISBN_13(ISBN_13);
1294 	}
1295     } 					/* end for (loop over ISBN_13[]) */
1296 
1297     if ((strlen(&ISBN_13[1]) != MAX_ISBN_13) || (nleft > 0))
1298 	bad_ISBN_13(ISBN_13);
1299 }
1300 
1301 
1302 #if defined(HAVE_STDC)
1303 static void
validate_ISSN(const char * the_ISSN,size_t n)1304 validate_ISSN(const char *the_ISSN, size_t n)
1305 #else
1306 static void
1307 validate_ISSN(the_ISSN, n)
1308 const char *the_ISSN;
1309 size_t n;
1310 #endif
1311 {
1312     long checksum;
1313     char ISSN[1 + MAX_ISSN + 1];	/* saved ISSN for error messages */
1314 					/* (use slots 1..8 instead of 0..7) */
1315     size_t k;				/* index into ISSN[] */
1316     size_t nleft;
1317 
1318     /*******************************************************************
1319        ISSN numbers are 8-character values from the set [0-9Xx], with
1320        a checksum given by
1321 
1322 		(sum(k=1:7) digit(k) * (k+2)) mod 11 == digit(8)
1323 
1324        where digits have their normal value, X (or x) as a digit has
1325        value 10, and spaces and hyphens are ignored.  The sum is
1326        bounded from above by 10*(3 + 4 + ... + 9) = 420, so even short
1327        (16-bit) integers are sufficient for the accumulation.
1328 
1329        ISSN digits are grouped into two 4-digit parts separated by
1330        space or hyphen.
1331     *******************************************************************/
1332 
1333     (void)strcpy(&ISSN[1],UNKNOWN_ISSN);
1334     nleft = copy_element(&ISSN[1], sizeof(ISSN)-1, the_ISSN, n);
1335 
1336     for (checksum = 0L, k = 1; (ISSN[k] != '\0'); ++k)
1337     {
1338 	if (k < MAX_ISSN)
1339 	    checksum += (long)(ISSN_DIGIT_VALUE(ISSN[k]) * (k + 2));
1340 	else if (k == MAX_ISSN)
1341 	{
1342 	    if ((checksum % 11L) != ISSN_DIGIT_VALUE(ISSN[k]))
1343 		bad_ISSN(ISSN);
1344 	}
1345     } 					/* end for (loop over ISSN[]) */
1346 
1347     if ((strlen(&ISSN[1]) != MAX_ISSN) || (nleft > 0))
1348 	bad_ISSN(ISSN);
1349 }
1350 
1351 #if defined(HAVE_STDC)
1352 static void
validate_URL(const char * the_URL,size_t n)1353 validate_URL(const char *the_URL, size_t n)
1354 #else
1355 static void
1356 validate_URL(the_URL, n)
1357 const char *the_URL;
1358 size_t n;
1359 #endif
1360 {
1361     char *p;
1362 
1363     p = stristr(the_URL, "://");
1364 
1365     if (p == (char *)NULL)
1366 	warning("Expected protocol://... in URL value ``%v%''");
1367     else
1368     {
1369 	if ( ((p - the_URL) >= 3) && (strncmp(&p[-3], "ftp", 3) == 0) )
1370 	    /* NO-OP */ ;
1371 	else if ( ((p - the_URL) >= 4) && (strncmp(&p[-4], "http", 4) == 0) )
1372 	    /* NO-OP */ ;
1373 	else if ( ((p - the_URL) >= 5) && (strncmp(&p[-5], "https", 5) == 0) )
1374 	    /* NO-OP */ ;
1375 	else
1376 	    warning("Unexpected protocol://... in URL value ``%v'': normally ftp://... or http://... or https://...");
1377 
1378 	if (stristr(the_URL, "doi.org/") != (char *)NULL)
1379 	    warning("Unexpected DOI in URL value ``%v'': move to separate DOI = \"...\" key/value in this entry");
1380 	else if ( (stristr(the_URL, ".com/10.") != (char *)NULL) ||
1381 		  (stristr(the_URL, ".edu/10.") != (char *)NULL) ||
1382 		  (stristr(the_URL, ".gov/10.") != (char *)NULL) ||
1383 		  (stristr(the_URL, ".mil/10.") != (char *)NULL) ||
1384 		  (stristr(the_URL, ".net/10.") != (char *)NULL) ||
1385 		  (stristr(the_URL, ".org/10.") != (char *)NULL) )
1386 	    warning("Possible DOI in URL value ``%v'': if so, move to separate DOI = \"...\" key/value in this entry");
1387     }
1388 }
1389