1 #include <config.h>
2 #include "xctype.h"
3 #include "xstat.h"
4 #include "xstring.h"
5 #include "xstdlib.h"
6
7 RCSID("$Id: chek.c,v 1.10 2017/06/09 14:47:20 beebe Exp beebe $")
8
9 #include "yesorno.h"
10 #include "match.h" /* must come AFTER yesorno.h */
11 #include "token.h"
12 #include "typedefs.h" /* must come AFTER match.h */
13
14 #if defined(HAVE_PATTERNS)
15 #define PATTERN_MATCHES(string,pattern) (match_pattern(string,pattern) == YES)
16 #else /* NOT defined(HAVE_PATTERNS) */
17 #define PATTERN_MATCHES(string,pattern) match_regexp(string,pattern)
18 #endif /* defined(HAVE_PATTERNS) */
19
20 #define PT_CHAPTER 0 /* index in pattern_names[] */
21 #define PT_MONTH 1 /* index in pattern_names[] */
22 #define PT_NUMBER 2 /* index in pattern_names[] */
23 #define PT_PAGES 3 /* index in pattern_names[] */
24 #define PT_VOLUME 4 /* index in pattern_names[] */
25 #define PT_YEAR 5 /* index in pattern_names[] */
26
27 #if !defined(STD_MAX_TOKEN)
28 /* This was 1000 in original BibTeX, then changed to 5000 for TeX Live */
29 /* builds up to 2011, and changed again for 2012--2019 releases to 20000 */
30 /* See glob_str_size in : $prefix/texlive/YYYY/texmf-dist/web2c/texmf.cnf */
31 /* #define STD_MAX_TOKEN ((size_t)1000) */ /* Standard BibTeX limit */
32 /* #define STD_MAX_TOKEN ((size_t)20000) */ /* TeX Live 2012--2018 limit */
33 #define STD_MAX_TOKEN ((size_t)200000) /* TeX Live 2019--2021 limit */
34
35 #endif /* !defined(STD_MAX_TOKEN) */
36
37 #define UNKNOWN_CODEN "??????"
38 #define MAX_CODEN (sizeof(UNKNOWN_CODEN)-1)
39
40 #define UNKNOWN_ISBN "??????????"
41 #define MAX_ISBN (sizeof(UNKNOWN_ISBN)-1)
42
43 #define UNKNOWN_ISBN_13 "?????????????"
44 #define MAX_ISBN_13 (sizeof(UNKNOWN_ISBN_13)-1)
45
46 #define UNKNOWN_ISSN "????????"
47 #define MAX_ISSN (sizeof(UNKNOWN_ISSN)-1)
48
49 extern YESorNO check_values; /* NO: suppress value checks */
50 extern char current_field[]; /* field name */
51 extern char current_key[]; /* string value */
52 extern char current_value[]; /* string value */
53 extern NAME_PAIR month_pair[];
54 extern PATTERN_NAMES pattern_names[];
55 extern char shared_string[];
56 extern FILE *stdlog; /* usually stderr */
57 extern YESorNO stdlog_on_stdout; /* NO for separate files */
58
59 extern void error ARGS((const char *msg_));
60 extern void ISBN_hyphenate ARGS((/*@out@*/ char *s_, /*@out@*/ char *t_, size_t maxs_));
61 extern void ISBN_13_hyphenate ARGS((/*@out@*/ char *s_, /*@out@*/ char *t_, size_t maxs_));
62 extern void warning ARGS((const char *msg_));
63
64 void check_chapter ARGS((void));
65 void check_DOI ARGS((void));
66 void check_CODEN ARGS((void));
67 void check_ISBN ARGS((void));
68 void check_ISSN ARGS((void));
69 YESorNO check_junior ARGS((const char *last_name_));
70 void check_key ARGS((void));
71 void check_length ARGS((size_t n_));
72 void check_month ARGS((void));
73 void check_number ARGS((void));
74 void check_other ARGS((void));
75 void check_pages ARGS((void));
76 void check_URL ARGS((void));
77 void check_volume ARGS((void));
78 void check_year ARGS((void));
79
80 static void bad_CODEN ARGS((char CODEN_[6]));
81 static void bad_ISBN ARGS((char ISBN_[11]));
82 static void bad_ISBN_13 ARGS((char ISBN_13_[14]));
83 static void bad_ISSN ARGS((char ISSN_[9]));
84 static YESorNO check_patterns ARGS((PATTERN_TABLE *pt_,const char *value_));
85 static int CODEN_character_value ARGS((int c_));
86 static size_t copy_element ARGS((char *target_, size_t nt_, const char *source_, size_t ns_));
87 static void incomplete_CODEN ARGS((char CODEN_[6]));
88 static YESorNO is_CODEN_char ARGS((int c_, size_t n_));
89 static YESorNO is_DOI_char ARGS((int c_, size_t n_));
90 static YESorNO is_ISBN_char ARGS((int c_, size_t n_));
91 static YESorNO is_ISBN_13_char ARGS((int c_, size_t n_));
92 static YESorNO is_ISSN_char ARGS((int c_, size_t n_));
93 static YESorNO is_URL_char ARGS((int c_, size_t n_));
94 static void parse_list ARGS((const char *s,
95 YESorNO (*is_name_char_) ARGS((int c_, size_t n_)),
96 void (*validate_) ARGS((const char *CODEN_, size_t n_))));
97 static void parse_element ARGS((/*@out@*/ parse_data *pd_));
98 static void parse_separator ARGS((/*@out@*/ parse_data *pd_));
99 static void validate_CODEN ARGS((const char *CODEN_, size_t n_));
100 static void validate_DOI ARGS((const char *CODEN_, size_t n_));
101 static void validate_ISBN ARGS((const char *ISBN_, size_t n_));
102 static void validate_ISBN_13 ARGS((const char *ISBN_, size_t n_));
103 static void validate_ISSN ARGS((const char *ISSN_, size_t n_));
104 static void validate_URL ARGS((const char *CODEN_, size_t n_));
105 static void unexpected ARGS((void));
106
107 #define elementsof(v) (sizeof(v) / sizeof(v[0]))
108
109 #define ISBN_DIGIT_VALUE(c) ((((int)(c) == (int)'X') || ((int)(c) == (int)'x')) ? 10 : \
110 ((int)(c) - (int)'0'))
111 /* correct only if digits are valid; */
112 /* the code below ensures that */
113
114 #define ISSN_DIGIT_VALUE(c) ISBN_DIGIT_VALUE(c)
115 /* ISSN digits are just like ISBN digits */
116
117 #if defined(HAVE_STDC)
118 static void
bad_CODEN(char CODEN[7])119 bad_CODEN(char CODEN[7])
120 #else /* K&R style */
121 static void
122 bad_CODEN(CODEN)
123 char CODEN[7];
124 #endif
125 {
126 static const char fmt[] =
127 "Invalid checksum for CODEN %c%c%c%c%c%c in ``%%f = %%v''";
128 char msg[sizeof(fmt)];
129
130 #define XCODEN(n) (int)((CODEN[n] == '\0') ? '?' : CODEN[n])
131
132 (void)sprintf(msg, fmt,
133 XCODEN(1), XCODEN(2), XCODEN(3), XCODEN(4), XCODEN(5), XCODEN(6));
134 warning(msg); /* should be error(), but some journals might have */
135 /* invalid CODENs (some books have invalid ISBNs) */
136 }
137
138
139 #if defined(HAVE_STDC)
140 static void
bad_ISBN(char ISBN[11])141 bad_ISBN(char ISBN[11])
142 #else /* K&R style */
143 static void
144 bad_ISBN(ISBN)
145 char ISBN[11];
146 #endif
147 {
148 #define MAXISBN (13+1) /* space for correctly hyphenated ISBN, plus NUL */
149 static const char fmt[] = "Invalid checksum for ISBN %s in ``%%f = %%v''";
150 char msg[sizeof(fmt)+MAXISBN-1-2];
151 char s[MAXISBN];
152 char t[MAXISBN];
153 size_t n;
154
155 (void)strcpy(s,UNKNOWN_ISBN);
156 n = strlen(&ISBN[1]);
157 (void)memcpy(s,&ISBN[1],(n > sizeof(s)) ? sizeof(s) : n);
158 s[10] = '\0';
159 ISBN_hyphenate(s,t,sizeof(s));
160
161 (void)sprintf(msg, fmt, s);
162 warning(msg); /* used to be error(), but some books actually have */
163 /* invalid ISBNs */
164 }
165
166
167 #if defined(HAVE_STDC)
168 static void
bad_ISBN_13(char ISBN_13[13+1])169 bad_ISBN_13(char ISBN_13[13 + 1])
170 #else /* K&R style */
171 static void
172 bad_ISBN_13(ISBN_13)
173 char ISBN_13[13 + 1];
174 #endif
175 {
176 #define MAXISBN_13 (13 + 3 + 1) /* space for correctly hyphenated ISBN_13, plus NUL */
177 static const char fmt[] = "Invalid checksum for ISBN_13 %s in ``%%f = %%v''";
178 char msg[sizeof(fmt) + MAXISBN_13 - 1 - 2];
179 char s[MAXISBN_13];
180 char t[MAXISBN_13];
181 size_t n;
182
183 (void)strcpy(s,UNKNOWN_ISBN_13);
184 n = strlen(&ISBN_13[1]);
185 (void)memcpy(s,&ISBN_13[1],(n > sizeof(s)) ? sizeof(s) : n);
186 s[13] = '\0';
187 ISBN_13_hyphenate(s,t,sizeof(s));
188
189 (void)sprintf(msg, fmt, s);
190 warning(msg); /* used to be error(), but some books actually have */
191 /* invalid ISBN_13s */
192 }
193
194
195 #if defined(HAVE_STDC)
196 static void
bad_ISSN(char ISSN[9])197 bad_ISSN(char ISSN[9])
198 #else /* K&R style */
199 static void
200 bad_ISSN(ISSN)
201 char ISSN[9];
202 #endif
203 {
204 static const char fmt[] =
205 "Invalid checksum for ISSN %c%c%c%c-%c%c%c%c in ``%%f = %%v''";
206 char msg[sizeof(fmt)];
207
208 #define XISSN(n) (int)((ISSN[n] == '\0') ? '?' : ISSN[n])
209
210 (void)sprintf(msg, fmt, XISSN(1), XISSN(2), XISSN(3), XISSN(4),
211 XISSN(5), XISSN(6), XISSN(7), XISSN(8));
212 warning(msg); /* used to be error(), but some journals might have */
213 /* invalid ISSNs (some books have invalid ISBNs) */
214 }
215
216
217 void
check_chapter(VOID)218 check_chapter(VOID)
219 {
220 #if defined(HAVE_OLDCODE)
221 size_t k;
222 size_t n = strlen(current_value) - 1;
223
224 /* match patterns like "23" and "23-1" */
225 for (k = 1; k < n; ++k)
226 { /* omit first and last characters -- they are quotation marks */
227 if (!(Isdigit(current_value[k]) || (current_value[k] == '-')))
228 break;
229 }
230 if (k == n)
231 return;
232 #else /* NOT defined(HAVE_OLDCODE) */
233 if (check_patterns(pattern_names[PT_CHAPTER].table,current_value) == YES)
234 return;
235 #endif /* defined(HAVE_OLDCODE) */
236
237 unexpected();
238 }
239
240
241 void
check_CODEN(VOID)242 check_CODEN(VOID)
243 {
244 parse_list(current_value, is_CODEN_char, validate_CODEN);
245 }
246
247
248 void
check_DOI(VOID)249 check_DOI(VOID)
250 {
251 parse_list(current_value, is_DOI_char, validate_DOI);
252
253 if ( IN_SET(current_value, ' ') ||
254 IN_SET(current_value, ',') ||
255 IN_SET(current_value, ';') )
256 warning("Unexpected space or list separator in DOI value ``%v''");
257 }
258
259
260 void
check_inodes(VOID)261 check_inodes(VOID)
262 {
263 struct stat buflog;
264 struct stat bufout;
265
266 stdlog_on_stdout = YES; /* assume the worst initially */
267
268 (void)fstat(fileno(stdlog),&buflog);
269 (void)fstat(fileno(stdout),&bufout);
270
271 #if OS_UNIX
272 stdlog_on_stdout = ((buflog.st_dev == bufout.st_dev) &&
273 (buflog.st_ino == bufout.st_ino)) ? YES : NO;
274 #endif /* OS_UNIX */
275
276 #if OS_PCDOS
277 /* No inodes, so use other fields instead */
278 stdlog_on_stdout = ((buflog.st_dev == bufout.st_dev) &&
279 (buflog.st_mode == bufout.st_mode) &&
280 (buflog.st_size == bufout.st_size) &&
281 (buflog.st_ctime == bufout.st_ctime)) ? YES : NO;
282 #endif /* OS_PCDOS */
283
284 #if OS_VAXVMS
285 /* Inode field is 3 separate values */
286 stdlog_on_stdout = ((buflog.st_dev == bufout.st_dev) &&
287 (buflog.st_ino[0] == bufout.st_ino[0]) &&
288 (buflog.st_ino[1] == bufout.st_ino[1]) &&
289 (buflog.st_ino[2] == bufout.st_ino[2])) ? YES : NO;
290 #endif /* OS_VAXVMS */
291
292 }
293
294
295 void
check_ISBN(VOID)296 check_ISBN(VOID)
297 {
298 char t[MAX_TOKEN_SIZE];
299
300 /* Supply correct hyphenation for all ISBNs */
301 ISBN_hyphenate(current_value,t,sizeof(t)/sizeof(t[0]));
302
303 parse_list(current_value, is_ISBN_char, validate_ISBN);
304 }
305
306
307 void
check_ISBN_13(VOID)308 check_ISBN_13(VOID)
309 {
310 char t[MAX_TOKEN_SIZE];
311
312 /* Supply correct hyphenation for all ISBN-13s */
313 ISBN_13_hyphenate(current_value, t, sizeof(t) / sizeof(t[0]));
314
315 parse_list(current_value, is_ISBN_13_char, validate_ISBN_13);
316 }
317
318
319 void
check_ISSN(VOID)320 check_ISSN(VOID)
321 {
322 parse_list(current_value, is_ISSN_char, validate_ISSN);
323 }
324
325
326 void
check_ISSN_L(VOID)327 check_ISSN_L(VOID)
328 {
329 parse_list(current_value, is_ISSN_char, validate_ISSN);
330
331 if (strlen(current_value) != 11) /* "1234-5689" */
332 warning("Unexpected ISSN-L field length in ``%v''");
333 }
334
335
336 #if defined(HAVE_STDC)
337 YESorNO
check_junior(const char * last_name)338 check_junior(const char *last_name)
339 #else /* K&R style */
340 YESorNO
341 check_junior(last_name)
342 const char *last_name;
343 #endif
344 { /* return YES: name is Jr.-like, else: NO */
345 int b_level; /* brace level */
346 static const char *juniors[] =
347 { /* name parts that parse like "Jr." */
348 "Jr",
349 "Jr.",
350 "Sr",
351 "Sr.",
352 "SJ",
353 "S.J.",
354 "S. J.",
355 (const char*)NULL, /* list terminator */
356 };
357 int k; /* index into juniors[] */
358 int n; /* index into last_name[] */
359
360 for (n = 0, b_level = 0; last_name[n] != '\0'; ++n)
361 { /* check for "Smith, Jr" and "Smith Jr" and */
362 switch (last_name[n]) /* convert to "{Smith, Jr}" and "{Smith Jr}" */
363 {
364 case '{':
365 b_level++;
366 break;
367
368 case '}':
369 b_level--;
370 break;
371
372 case ',':
373 if (b_level == 0)
374 return (YES);
375 break;
376
377 case '\t':
378 case ' ': /* test for Jr.-like name */
379 if (b_level == 0)
380 {
381 for (k = 0; juniors[k] != (const char*)NULL; ++k)
382 {
383 if (strnicmp(&last_name[n+1],juniors[k],strlen(juniors[k]))
384 == 0)
385 return (YES);
386 } /* end for (k...) */
387 if (strcspn(&last_name[n+1],"IVX") == 0)
388 return (YES); /* probably small upper-case Roman number */
389 }
390 break;
391
392 default:
393 break;
394 } /* end switch (last_name[n]) */
395 } /* end for (n = 0,...) */
396 return (NO);
397 }
398
399
400 void
check_key(VOID)401 check_key(VOID)
402 {
403 int k; /* index into pattern_names[] */
404
405 for (k = 0; pattern_names[k].name != (const char*)NULL; ++k)
406 {
407 if (stricmp(pattern_names[k].name,current_key) == 0)
408 { /* then found the required table */
409 if (check_patterns(pattern_names[k].table,current_key) == NO)
410 warning("Unexpected citation key ``%k''");
411 return;
412 }
413 }
414 }
415
416
417 #if defined(HAVE_STDC)
418 void
check_length(size_t n)419 check_length(size_t n)
420 #else /* K&R style */
421 void
422 check_length(n)
423 size_t n;
424 #endif
425 {
426 if ((check_values == YES) && (n >= STD_MAX_TOKEN))
427 warning("String length exceeds standard BibTeX limit for ``%f'' entry");
428 }
429
430
431 void
check_month(VOID)432 check_month(VOID)
433 {
434 size_t n;
435
436 n = strlen(current_value);
437
438 if (n == 3) /* check for match against standard abbrevs */
439 {
440 int m; /* month index */
441
442 for (m = 0; month_pair[m].old_name != (const char*)NULL; ++m)
443 {
444 if (stricmp(month_pair[m].new_name,current_value) == 0)
445 return;
446 }
447 }
448
449 /* Hand coding for the remaining patterns is too ugly to contemplate,
450 so we only provide the checking when real pattern matching is
451 available. */
452
453 #if !defined(HAVE_OLDCODE)
454 if (check_patterns(pattern_names[PT_MONTH].table,current_value) == YES)
455 return;
456 #endif /* !defined(HAVE_OLDCODE) */
457
458 unexpected();
459 }
460
461
462 void
check_number(VOID)463 check_number(VOID)
464 {
465 #if defined(HAVE_OLDCODE)
466 size_t k;
467 size_t n = strlen(current_value) - 1;
468
469 /* We expect the value string to match the regexp "[0-9a-zA-Z---,/ ()]+
470 to handle values like "UMIACS-TR-89-11, CS-TR-2189, SRC-TR-89-13",
471 "RJ 3847 (43914)", "{STAN-CS-89-1256}", "UMIACS-TR-89-3.1, CS-TR-2177.1",
472 "TR\#89-24", "23", "23-27", and "3+4". */
473
474 for (k = 1; k < n; ++k)
475 { /* omit first and last characters -- they are quotation marks */
476 if (!( Isalnum(current_value[k])
477 || Isspace(current_value[k]) || (current_value[k] == '-')
478 || (current_value[k] == '+') || (current_value[k] == ',')
479 || (current_value[k] == '.') || (current_value[k] == '/')
480 || (current_value[k] == '#') || (current_value[k] == '\\')
481 || (current_value[k] == '(') || (current_value[k] == ')')
482 || (current_value[k] == '{') || (current_value[k] == '}') ))
483 break;
484 }
485 if (k == n)
486 return;
487 #else /* NOT defined(HAVE_OLDCODE) */
488 if (check_patterns(pattern_names[PT_NUMBER].table,current_value) == YES)
489 return;
490 #endif /* defined(HAVE_OLDCODE) */
491
492 unexpected();
493 }
494
495
496 void
check_other(VOID)497 check_other(VOID)
498 {
499 int k; /* index into pattern_names[] */
500
501 for (k = 0; pattern_names[k].name != (const char*)NULL; ++k)
502 {
503 if (stricmp(pattern_names[k].name,current_field) == 0)
504 { /* then found the required table */
505 if (check_patterns(pattern_names[k].table,current_value) == NO)
506 unexpected();
507 return;
508 }
509 }
510 }
511
512
513 void
check_pages(VOID)514 check_pages(VOID)
515 {
516 /* Need to handle "B721--B729" as well as "721--729"; some
517 physics journals use an initial letter in page number. */
518
519 #if defined(HAVE_OLDCODE)
520 int number = 1;
521 size_t k;
522 size_t n = strlen(current_value) - 1;
523
524 /* We expect the value string to match the regexps [0-9]+ or
525 [0-9]+--[0-9]+ */
526 for (k = 1; k < n; ++k)
527 { /* omit first and last characters -- they are quotation marks */
528 switch (current_value[k])
529 {
530 case '0':
531 case '1':
532 case '2':
533 case '3':
534 case '4':
535 case '5':
536 case '6':
537 case '7':
538 case '8':
539 case '9':
540 if (number > 2)
541 {
542 warning("More than 2 page numbers in ``%f = %v''");
543 return;
544 }
545 break;
546
547 case '-':
548 number++;
549 if (current_value[k+1] != '-') /* expect -- */
550 {
551 warning(
552 "Use en-dash, --, to separate page numbers in ``%f = %v''");
553 return;
554 }
555 ++k;
556 if (current_value[k+1] == '-') /* should not have --- */
557 {
558 warning(
559 "Use en-dash, --, to separate page numbers in ``%f = %v''");
560 return;
561 }
562 break;
563
564 case ',':
565 number++;
566 break;
567
568 default:
569 unexpected();
570 return;
571 }
572 }
573 #else /* NOT defined(HAVE_OLDCODE) */
574 if (check_patterns(pattern_names[PT_PAGES].table,current_value) == YES)
575 return;
576 #endif /* defined(HAVE_OLDCODE) */
577
578 unexpected();
579 }
580
581
582 #if (defined(HAVE_PATTERNS) || defined(HAVE_REGEXP) || defined(HAVE_RECOMP))
583
584 #if defined(HAVE_STDC)
585 static YESorNO
check_patterns(PATTERN_TABLE * pt,const char * value)586 check_patterns(PATTERN_TABLE* pt,const char *value)
587 #else /* K&R style */
588 static YESorNO
589 check_patterns(pt,value)
590 PATTERN_TABLE* pt;
591 const char *value;
592 #endif
593 {
594 /* Return YES if current_value[] matches a pattern, or there are no
595 patterns, and NO if there is a match failure. Any message
596 associated with a successfully-matched pattern is printed before
597 returning. */
598
599 int k;
600
601 for (k = 0; k < pt->current_size; ++k)
602 {
603 if (PATTERN_MATCHES(value,pt->patterns[k].pattern))
604 {
605 if (pt->patterns[k].message != (const char*)NULL)
606 {
607 if (pt->patterns[k].message[0] == '?') /* special error flag */
608 error(pt->patterns[k].message + 1);
609 else /* just normal warning */
610 warning(pt->patterns[k].message);
611 }
612 return (YES);
613 }
614 }
615 return ((pt->current_size == 0) ? YES : NO);
616 }
617 #endif /* (defined(HAVE_PATTERNS) || defined(HAVE_REGEXP) ||
618 defined(HAVE_RECOMP)) */
619
620
621 void
check_URL(VOID)622 check_URL(VOID)
623 {
624 parse_list(current_value, is_URL_char, validate_URL);
625 }
626
627
628 void
check_volume(VOID)629 check_volume(VOID)
630 {
631 #if defined(HAVE_OLDCODE)
632 size_t k;
633 size_t n = strlen(current_value) - 1;
634
635 /* Match patterns like "27", "27A", "27/3", "27A 3", "SMC-13", "VIII",
636 "B", "{IX}", "1.2", "Special issue A", and "11 and 12". However,
637 NEVER match pattern like "11(5)", since that is probably an erroneous
638 incorporation of issue number into the volume value. */
639
640 for (k = 1; k < n; ++k)
641 { /* omit first and last characters -- they are quotation marks */
642 if (!( Isalnum(current_value[k])
643 || (current_value[k] == '-')
644 || (current_value[k] == '/')
645 || (current_value[k] == '.')
646 || Isspace(current_value[k])
647 || (current_value[k] == '{')
648 || (current_value[k] == '}') ))
649 {
650 unexpected();
651 return;
652 }
653 }
654 #else /* NOT defined(HAVE_OLDCODE) */
655 if (check_patterns(pattern_names[PT_VOLUME].table,current_value) == YES)
656 return;
657 #endif /* defined(HAVE_OLDCODE) */
658
659 unexpected();
660 }
661
662
663 void
check_year(VOID)664 check_year(VOID)
665 {
666 char *p;
667 char *q;
668 long year;
669
670 #if defined(HAVE_OLDCODE)
671 size_t k;
672 size_t n;
673
674 /* We expect the value string to match the regexp [0-9]+ */
675 for (k = 1, n = strlen(current_value) - 1; k < n; ++k)
676 { /* omit first and last characters -- they are quotation marks */
677 if (!Isdigit(current_value[k]))
678 {
679 warning("Non-digit found in field value of ``%f = %v''");
680 return;
681 }
682 }
683 #else /* NOT defined(HAVE_PATTERNS) */
684 if (check_patterns(pattern_names[PT_YEAR].table,current_value) == YES)
685 return;
686 unexpected();
687 #endif /* defined(HAVE_PATTERNS) */
688
689 for (p = current_value; (*p != '\0') ; ) /* now validate all digit strings */
690 {
691 if (Isdigit(*p)) /* then have digit string */
692 { /* now make sure year is `reasonable' */
693 year = strtol(p,&q,10);
694 if ((year < 1800L) || (year > 2099L))
695 warning("Suspicious year in ``%f = %v''");
696 p = q;
697 }
698 else /* ignore other characters */
699 p++;
700 }
701 }
702
703
704 #if defined(HAVE_STDC)
705 static int
CODEN_character_value(int c)706 CODEN_character_value(int c)
707 #else /* K&R style */
708 static int
709 CODEN_character_value(c)
710 int c;
711 #endif
712 {
713 if (((int)'a' <= c) && (c <= (int)'z'))
714 return ((c - (int)'a' + 1));
715 else if (((int)'A' <= c) && (c <= (int)'Z'))
716 return ((c - (int)'A' + 1));
717 else if (((int)'1' <= c) && (c <= (int)'9'))
718 return ((c - (int)'1' + 27));
719 else if (c == (int)'0')
720 return (36);
721 else
722 return (-1);
723 }
724
725
726 #if defined(HAVE_STDC)
727 static size_t
copy_element(char * target,size_t nt,const char * source,size_t ns)728 copy_element(char *target, size_t nt, const char *source, size_t ns)
729 #else /* K&R style */
730 static size_t
731 copy_element(target, nt, source, ns)
732 char *target;
733 size_t nt;
734 const char *source;
735 size_t ns;
736 #endif
737 { /* Copy source[] into target[], excluding spaces and hyphens, and add a */
738 /* trailing NUL. Return the number of characters left in source[], */
739 /* after ignoring trailing spaces and hyphens. */
740 size_t ks;
741 size_t kt;
742
743 for (ks = 0, kt = 0; (ks < ns) && (kt < (nt - 1)); ++ks)
744 {
745 if (!((source[ks] == '-') || Isspace(source[ks])))
746 target[kt++] = source[ks];
747 }
748 target[kt] = '\0';
749
750 for ( ; (source[ks] == '-') || Isspace(source[ks]); ++ks)
751 continue; /* skip trailing space and hyphens */
752
753 return (size_t)(ns - ks);
754 }
755
756
757 #if defined(HAVE_STDC)
758 static void
incomplete_CODEN(char CODEN[7])759 incomplete_CODEN(char CODEN[7])
760 #else /* K&R style */
761 static void
762 incomplete_CODEN(CODEN)
763 char CODEN[7];
764 #endif
765 {
766 static const char fmt[] =
767 "Incomplete CODEN %c%c%c%c%c should be %c%c%c%c%c%c in ``%%f = %%v''";
768 char msg[sizeof(fmt)];
769
770 (void)sprintf(msg, fmt, CODEN[1], CODEN[2], CODEN[3], CODEN[4], CODEN[5],
771 CODEN[1], CODEN[2], CODEN[3], CODEN[4], CODEN[5], CODEN[6]);
772 warning(msg); /* should be error(), but some journals might have */
773 /* invalid CODENs (some books have invalid ISBNs) */
774 }
775
776
777 #if defined(HAVE_STDC)
778 static YESorNO
is_CODEN_char(int c,size_t n)779 is_CODEN_char(int c, size_t n)
780 #else /* K&R style */
781 static YESorNO
782 is_CODEN_char(c,n)
783 int c;
784 size_t n;
785 #endif
786 {
787 static size_t n_significant = 0;
788 /* number of significant chars already seen in current CODEN */
789
790 /* CODENs match [A-Z]-*[A-Z]-*[A-Z]-*[A-Z]-*[A-Z]-*[A-Z0-9], but we
791 also allow lower-case letters. */
792
793 if (n == 0) /* start new CODEN */
794 n_significant = 0;
795
796 /* embedded hyphens are accepted, but are not significant */
797 if ((n_significant > 0) && (c == (int)'-'))
798 return (YES);
799 else if ((n_significant < 5) && Isalpha(c))
800 {
801 n_significant++;
802 return (YES);
803 }
804 else if ((n_significant >= 5) && Isalnum(c)) /* sixth char can be a digit */
805 {
806 n_significant++;
807 return (YES);
808 }
809
810 return (NO);
811 }
812
813
814 #if defined(HAVE_STDC)
815 static YESorNO
is_DOI_char(int c,size_t n)816 is_DOI_char(int c, size_t n)
817 #else /* K&R style */
818 static YESorNO
819 is_DOI_char(c,n)
820 int c;
821 size_t n;
822 #endif
823 {
824 return (Isprint(c) ? YES : NO); /* DOIs match any printable string */
825 }
826
827
828 #if defined(HAVE_STDC)
829 static YESorNO
is_ISBN_char(int c,size_t n)830 is_ISBN_char(int c, size_t n)
831 #else /* K&R style */
832 static YESorNO
833 is_ISBN_char(c,n)
834 int c;
835 size_t n;
836 #endif
837 {
838 static size_t n_significant = 0;
839 /* number of significant chars already seen in current CODEN */
840
841 /* ISBNs match
842 [0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9][- ]*
843 [0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9xX]
844 */
845
846 if (n == 0) /* start new ISBN */
847 n_significant = 0;
848
849 /* embedded hyphens and space are accepted, but are not significant */
850 if ((n_significant > 0) && ((c == (int)'-') || Isspace(c)))
851 return (YES);
852 else if ((n_significant < 9) && Isdigit(c))
853 {
854 n_significant++;
855 return (YES);
856 }
857 else if ((n_significant >= 9) && (Isdigit(c) || (c == (int)'X') || (c == (int)'x')))
858 { /* tenth character may be [0-9xX] */
859 n_significant++;
860 return (YES);
861 }
862
863 return (NO);
864 }
865
866
867 #if defined(HAVE_STDC)
868 static YESorNO
is_ISBN_13_char(int c,size_t n)869 is_ISBN_13_char(int c, size_t n)
870 #else /* K&R style */
871 static YESorNO
872 is_ISBN_13_char(c,n)
873 int c;
874 size_t n;
875 #endif
876 {
877 return (is_ISBN_char(c, n));
878 }
879
880
881 #if defined(HAVE_STDC)
882 static YESorNO
is_ISSN_char(int c,size_t n)883 is_ISSN_char(int c, size_t n)
884 #else /* K&R style */
885 static YESorNO
886 is_ISSN_char(c,n)
887 int c;
888 size_t n;
889 #endif
890 {
891 static size_t n_significant = 0;
892 /* number of significant chars already seen in current CODEN */
893
894 /* ISSNs match
895 [0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9][- ]*
896 [0-9][- ]*[0-9][- ]*[0-9][- ]*[0-9xX]
897 */
898
899 if (n == 0) /* start new ISSN */
900 n_significant = 0;
901
902 /* embedded hyphens and space are accepted, but are not significant */
903 if ((n_significant > 0) && ((c == (int)'-') || Isspace(c)))
904 return (YES);
905 else if ((n_significant < 7) && Isdigit(c))
906 {
907 n_significant++;
908 return (YES);
909 }
910 else if ((n_significant >= 7) && (Isdigit(c) || (c == (int)'X') || (c == (int)'x')))
911 { /* eighth character may be [0-9xX] */
912 n_significant++;
913 return (YES);
914 }
915
916 return (NO);
917 }
918
919
920 #if defined(HAVE_STDC)
921 static YESorNO
is_URL_char(int c,size_t n)922 is_URL_char(int c, size_t n)
923 #else /* K&R style */
924 static YESorNO
925 is_URL_char(c,n)
926 int c;
927 size_t n;
928 #endif
929 {
930 return (Isprint(c) ? YES : NO); /* URLs match any printable string */
931 }
932
933
934 #if defined(HAVE_STDC)
935 static void
parse_list(const char * s,YESorNO (* is_name_char)ARGS ((int c,size_t n)),void (* validate)ARGS ((const char * s,size_t n)))936 parse_list(const char *s, YESorNO (*is_name_char) ARGS((int c, size_t n)),
937 void (*validate) ARGS((const char *s, size_t n)))
938 #else /* K&R style */
939 static void
940 parse_list(s, is_name_char, validate)
941 const char *s;
942 YESorNO (*is_name_char) ARGS((int c, size_t n));
943 void (*validate) ARGS((const char *s, size_t n));
944 #endif
945 {
946 parse_data pd;
947
948 /*******************************************************************
949 Parse a list of CODEN, ISBN, or ISSN elements, according to the
950 grammar:
951
952 LIST : NAME
953 | NAME SEPARATOR LIST
954
955 SEPARATOR : [not-a-token-char]+ | (nested balanced parentheses)
956
957 NAME : SEPARATOR* NAME'
958
959 NAME' : [token-char]+
960
961 This simple, and permissive, grammar accepts any strings that
962 contain sequences of zero or more CODEN, ISBN, or ISSN
963 elements, separated by one or more of characters which are not
964 themselves legal element characters. The first element in the
965 list may be preceded by any number of non-element characters.
966 Comments are supported as arbitrary strings inside balanced
967 parentheses, allowing lists like
968
969 "0-387-97621-4 (invalid ISBN checksum), 3-540-97621-3"
970
971 "0020-0190 (1982--1990), 0733-8716 (1991--)"
972
973 "0-8493-0190-4 (set), 0-8493-0191-2 (v. 1),
974 0-8493-0192-0 (v. 2), 0-8493-0193-9 (v. 3)"
975
976 The distinction between NAME' and SEPARATOR characters is made
977 by the argument function, (*is_name_char)(), and the validation
978 of the elements is done by the argument function (*validate)().
979
980 This generality makes it possible for the same code to be
981 reused for at least CODEN, ISBN, and ISSN values, and possibly
982 others in future versions of this program.
983
984 Tokens are not copied from the list, so no additional dynamic
985 string storage is required.
986 *******************************************************************/
987
988 pd.s = s;
989 pd.is_name_char = is_name_char;
990
991 for (;;)
992 {
993 parse_separator(&pd); /* may produce a zero-length token */
994 parse_element(&pd);
995 if (pd.token_length == 0) /* no more tokens in list */
996 return;
997 (*validate)(pd.token, pd.token_length);
998 }
999 }
1000
1001
1002 #if defined(HAVE_STDC)
1003 static void
parse_element(parse_data * pd)1004 parse_element(/*@out@*/ parse_data *pd)
1005 #else /* K&R style */
1006 static void
1007 parse_element(pd)
1008 /*@out@*/ parse_data *pd;
1009 #endif
1010 {
1011 size_t n;
1012
1013 for (n = 0, pd->token = pd->s; (*pd->s != '\0') && ((*pd->is_name_char)((int)*pd->s,n) == YES);
1014 n++, pd->s++)
1015 continue;
1016
1017 pd->token_length = (size_t)(pd->s - pd->token);
1018 }
1019
1020
1021 #if defined(HAVE_STDC)
1022 static void
parse_separator(parse_data * pd)1023 parse_separator(/*@out@*/ parse_data *pd)
1024 #else /* K&R style */
1025 static void
1026 parse_separator(pd)
1027 /*@out@*/ parse_data *pd;
1028 #endif
1029 {
1030 size_t n;
1031 int paren_level; /* parenthesis level */
1032
1033 pd->token = pd->s;
1034
1035 for (n = 0, paren_level = 0;
1036 ((*pd->s != '\0') && (((*pd->is_name_char)((int)*pd->s,n) == NO) || (paren_level > 0)));
1037 n++, pd->s++)
1038 {
1039 if (*pd->s == '(')
1040 paren_level++;
1041 else if (*pd->s == ')')
1042 {
1043 paren_level--;
1044 if (paren_level == 0)
1045 n = 0;
1046 }
1047 }
1048
1049 pd->token_length = (size_t)(pd->s - pd->token);
1050 if (paren_level != 0)
1051 warning("Non-zero parenthesis level in ``%f = %v''");
1052 }
1053
1054
1055 static void
unexpected(VOID)1056 unexpected(VOID)
1057 {
1058 warning("Unexpected value in ``%f = %v''");
1059 }
1060
1061
1062 #if defined(HAVE_STDC)
1063 static void
validate_CODEN(const char * the_CODEN,size_t n)1064 validate_CODEN(const char *the_CODEN, size_t n)
1065 #else
1066 static void
1067 validate_CODEN(the_CODEN, n)
1068 const char *the_CODEN;
1069 size_t n;
1070 #endif
1071 {
1072 int checksum;
1073 char CODEN[1 + MAX_CODEN + 1]; /* saved CODEN for error messages */
1074 /* (use slots 1..6 instead of 0..5) */
1075 size_t k; /* index into CODEN[] */
1076 size_t nleft;
1077
1078 #define CODEN_CHECK_CHARACTER(n) "9ABCDEFGHIJKLMNOPQRSTUVWXYZ2345678"[n]
1079
1080 /*******************************************************************
1081 CODEN values are 6-character strings from the set [A-Z0-9],
1082 with a check digit stored in the 6th position given by
1083
1084 (11*N1 + 7*N2 + 5*N3 + 3*N4 + 1*N5) mod 34 == X
1085
1086 where the Nk are 1..26 for A..Z, and 27..36 for 1..9..0.
1087 However, the checksum X (in 0..33) is represented by the
1088 corresponding character in the different 34-character range
1089 [9A-Z2-8], which excludes digits 0 and 1 to avoid confusion
1090 with letters O and I.
1091
1092 In library catalogs, the 6th CODEN digit is often omitted, so
1093 when we find it missing in a CODEN value string, we print a
1094 warning to tell the user what it should be. However, we
1095 intentionally do NOT insert it into the bibclean output,
1096 because the value string may be corrupted, instead of just
1097 truncated.
1098
1099 The largest possible sum above is 11*36 + 7*36 + 5*36 + 3*36 +
1100 1*36 = 36*(11 + 7 + 5 + 3 + 1) = 36*27 = 972, corresponding to
1101 the CODEN value 00000T, since 972 mod 34 = 20, which maps to
1102 the letter T. In reality, the limit is lower than this,
1103 because the initial CODEN character is always alphabetic; the
1104 largest usable CODEN would then be Z0000, which has a checksum
1105 of 11*26 + 7*36 + 5*36 + 3*36 + 1*36 = 36*(11 + 7 + 5 + 3 + 1)
1106 - 10*11 = 862. Even 16-bit (short) integers are adequate for
1107 this computation.
1108
1109 Old CODEN values may be stored with a hyphen between the 4th
1110 and 5th characters, e.g. "JACS-A" and "JACS-AT", as well as
1111 just "JACSA" and "JACSAT". Unlike ISBN and ISSN values, spaces
1112 are NOT used inside CODEN values.
1113 *******************************************************************/
1114
1115 (void)strcpy(&CODEN[1], UNKNOWN_CODEN);
1116 nleft = copy_element(&CODEN[1], sizeof(CODEN)-1, the_CODEN, n);
1117
1118 for (checksum = 0, k = 1; CODEN[k] != '\0'; ++k)
1119 {
1120 if (k < MAX_CODEN)
1121 {
1122 static int multiplier[] = { 0, 11, 7, 5, 3, 1 };
1123
1124 checksum += CODEN_character_value((int)CODEN[k]) * multiplier[k];
1125 }
1126 else if (k == MAX_CODEN)
1127 {
1128 if (CODEN_CHECK_CHARACTER(checksum % 34) != CODEN[k])
1129 bad_CODEN(CODEN);
1130 }
1131 } /* end for (loop over CODEN[]) */
1132
1133 if (strlen(&CODEN[1]) == (MAX_CODEN - 1))
1134 { /* check digit omitted, so tell the user what it should be */
1135 CODEN[MAX_CODEN] = CODEN_CHECK_CHARACTER(checksum % 34);
1136 incomplete_CODEN(CODEN);
1137 }
1138 else if ((strlen(&CODEN[1]) != MAX_CODEN) || (nleft > 0))
1139 bad_CODEN(CODEN);
1140 }
1141
1142
1143 #if defined(HAVE_STDC)
1144 static void
validate_DOI(const char * the_DOI,size_t n)1145 validate_DOI(const char *the_DOI, size_t n)
1146 #else
1147 static void
1148 validate_DOI(the_DOI, n)
1149 const char *the_DOI;
1150 size_t n;
1151 #endif
1152 {
1153 /*
1154 ** Typical input:
1155 **
1156 ** the_DOI = "\"http://dx.doi.org/10.1000/a.b.c\""
1157 **
1158 ** Notice that the delimiting quotation marks are part of the
1159 ** string value, so we must skip the first character in matching
1160 ** against members of the list of valid prefixes.
1161 */
1162
1163 int k, match_count;
1164 static const char *doi_prefix_list[] =
1165 { /* the DOI organization introduced new prefixes in 2016 */
1166 "http://doi.org/",
1167 "http://dx.doi.org/", /* original single-prefix style (2000--2016) */
1168 "https://doi.org/",
1169 "https://dx.doi.org/",
1170 #if defined(DOI_RAW_VALID)
1171 "10."
1172 #endif
1173 };
1174
1175 for (k = 0, match_count = 0; k < elementsof(doi_prefix_list); ++k)
1176 {
1177 if (strncmp(&the_DOI[1], doi_prefix_list[k], strlen(doi_prefix_list[k])) == 0)
1178 match_count++;
1179 }
1180
1181 if (match_count == 0)
1182 warning("Unexpected prefix in DOI value ``%v''");
1183 }
1184
1185
1186 #if defined(HAVE_STDC)
1187 static void
validate_ISBN(const char * the_ISBN,size_t n)1188 validate_ISBN(const char *the_ISBN, size_t n)
1189 #else
1190 static void
1191 validate_ISBN(the_ISBN, n)
1192 const char *the_ISBN;
1193 size_t n;
1194 #endif
1195 {
1196 int checksum;
1197 char ISBN[1 + MAX_ISBN + 1]; /* saved ISBN for error messages */
1198 /* (use slots 1..10 instead of 0..9) */
1199 size_t k; /* index into ISBN[] */
1200 size_t nleft;
1201
1202 /*******************************************************************
1203 ISBN numbers are 10-character values from the set [0-9Xx], with
1204 a checksum given by
1205
1206 (sum(k=1:9) digit(k) * k) mod 11 == digit(10)
1207
1208 where digits have their normal value, X (or x) as a digit has
1209 value 10, and spaces and hyphens are ignored. The sum is
1210 bounded from above by 10*(1 + 2 + ... + 9) = 450, so even short
1211 (16-bit) integers are sufficient for the accumulation.
1212
1213 ISBN digits are grouped into four parts separated by space or
1214 hyphen: countrygroupnumber-publishernumber-booknumber-checkdigit.
1215 *******************************************************************/
1216
1217 (void)strcpy(&ISBN[1],UNKNOWN_ISBN);
1218 nleft = copy_element(&ISBN[1], sizeof(ISBN)-1, the_ISBN, n);
1219
1220 for (checksum = 0, k = 1; ISBN[k] != '\0'; ++k)
1221 {
1222 if (k < MAX_ISBN)
1223 checksum += ISBN_DIGIT_VALUE(ISBN[k]) * k;
1224 else if (k == MAX_ISBN)
1225 {
1226 if ((checksum % 11) != ISBN_DIGIT_VALUE(ISBN[k]))
1227 bad_ISBN(ISBN);
1228 }
1229 } /* end for (loop over ISBN[]) */
1230
1231 if ((strlen(&ISBN[1]) != MAX_ISBN) || (nleft > 0))
1232 bad_ISBN(ISBN);
1233 }
1234
1235
1236 #if defined(HAVE_STDC)
1237 static void
validate_ISBN_13(const char * the_ISBN_13,size_t n)1238 validate_ISBN_13(const char *the_ISBN_13, size_t n)
1239 #else
1240 static void
1241 validate_ISBN_13(the_ISBN_13, n)
1242 const char *the_ISBN_13;
1243 size_t n;
1244 #endif
1245 {
1246 int checksum;
1247 char ISBN_13[1 + MAX_ISBN_13 + 1]; /* saved ISBN_13 for error messages */
1248 /* (use slots 1..13 instead of 0..12) */
1249 size_t k; /* index into ISBN_13[] */
1250 size_t nleft;
1251
1252 /*******************************************************************
1253 ISBN_13 numbers are 13-character values from the set [0-9Xx], with
1254 a final checksum digit given by
1255
1256 rem = (sum(k=1:12) digit(k) * weight(k)) mod 10
1257 weight(k) = if (k odd) then 1 else 3
1258 digit(13) = if (rem == 0) then 0 else (10 - rem)
1259
1260 where digits have their normal value, X (or x) as a digit has
1261 value 10, and spaces and hyphens are ignored. The sum is
1262 bounded from above by 3*(9 + 9 + ... + 9) = 324, so even
1263 short (16-bit) integers are sufficient for the accumulation.
1264
1265 ISBN_13 digits are grouped into five parts separated by space
1266 or hyphen:
1267
1268 978-countrygroupnumber-publishernumber-booknumber-checkdigit.
1269
1270 The initial prefix changes to 979 when the 978 group is
1271 exhausted.
1272 *******************************************************************/
1273
1274 (void)strcpy(&ISBN_13[1],UNKNOWN_ISBN_13);
1275 nleft = copy_element(&ISBN_13[1], sizeof(ISBN_13)-1, the_ISBN_13, n);
1276
1277 for (checksum = 0, k = 1; ISBN_13[k] != '\0'; ++k)
1278 {
1279 size_t weight;
1280
1281 weight = (k & 1) ? 1 : 3;
1282
1283 if (k < MAX_ISBN_13)
1284 checksum += ISBN_DIGIT_VALUE(ISBN_13[k]) * weight ;
1285 else if (k == MAX_ISBN_13)
1286 {
1287 size_t digit_13, rem;
1288
1289 rem = checksum % 10;
1290 digit_13 = (rem == 0) ? 0 : (10 - rem);
1291
1292 if (digit_13 != ISBN_DIGIT_VALUE(ISBN_13[k]))
1293 bad_ISBN_13(ISBN_13);
1294 }
1295 } /* end for (loop over ISBN_13[]) */
1296
1297 if ((strlen(&ISBN_13[1]) != MAX_ISBN_13) || (nleft > 0))
1298 bad_ISBN_13(ISBN_13);
1299 }
1300
1301
1302 #if defined(HAVE_STDC)
1303 static void
validate_ISSN(const char * the_ISSN,size_t n)1304 validate_ISSN(const char *the_ISSN, size_t n)
1305 #else
1306 static void
1307 validate_ISSN(the_ISSN, n)
1308 const char *the_ISSN;
1309 size_t n;
1310 #endif
1311 {
1312 long checksum;
1313 char ISSN[1 + MAX_ISSN + 1]; /* saved ISSN for error messages */
1314 /* (use slots 1..8 instead of 0..7) */
1315 size_t k; /* index into ISSN[] */
1316 size_t nleft;
1317
1318 /*******************************************************************
1319 ISSN numbers are 8-character values from the set [0-9Xx], with
1320 a checksum given by
1321
1322 (sum(k=1:7) digit(k) * (k+2)) mod 11 == digit(8)
1323
1324 where digits have their normal value, X (or x) as a digit has
1325 value 10, and spaces and hyphens are ignored. The sum is
1326 bounded from above by 10*(3 + 4 + ... + 9) = 420, so even short
1327 (16-bit) integers are sufficient for the accumulation.
1328
1329 ISSN digits are grouped into two 4-digit parts separated by
1330 space or hyphen.
1331 *******************************************************************/
1332
1333 (void)strcpy(&ISSN[1],UNKNOWN_ISSN);
1334 nleft = copy_element(&ISSN[1], sizeof(ISSN)-1, the_ISSN, n);
1335
1336 for (checksum = 0L, k = 1; (ISSN[k] != '\0'); ++k)
1337 {
1338 if (k < MAX_ISSN)
1339 checksum += (long)(ISSN_DIGIT_VALUE(ISSN[k]) * (k + 2));
1340 else if (k == MAX_ISSN)
1341 {
1342 if ((checksum % 11L) != ISSN_DIGIT_VALUE(ISSN[k]))
1343 bad_ISSN(ISSN);
1344 }
1345 } /* end for (loop over ISSN[]) */
1346
1347 if ((strlen(&ISSN[1]) != MAX_ISSN) || (nleft > 0))
1348 bad_ISSN(ISSN);
1349 }
1350
1351 #if defined(HAVE_STDC)
1352 static void
validate_URL(const char * the_URL,size_t n)1353 validate_URL(const char *the_URL, size_t n)
1354 #else
1355 static void
1356 validate_URL(the_URL, n)
1357 const char *the_URL;
1358 size_t n;
1359 #endif
1360 {
1361 char *p;
1362
1363 p = stristr(the_URL, "://");
1364
1365 if (p == (char *)NULL)
1366 warning("Expected protocol://... in URL value ``%v%''");
1367 else
1368 {
1369 if ( ((p - the_URL) >= 3) && (strncmp(&p[-3], "ftp", 3) == 0) )
1370 /* NO-OP */ ;
1371 else if ( ((p - the_URL) >= 4) && (strncmp(&p[-4], "http", 4) == 0) )
1372 /* NO-OP */ ;
1373 else if ( ((p - the_URL) >= 5) && (strncmp(&p[-5], "https", 5) == 0) )
1374 /* NO-OP */ ;
1375 else
1376 warning("Unexpected protocol://... in URL value ``%v'': normally ftp://... or http://... or https://...");
1377
1378 if (stristr(the_URL, "doi.org/") != (char *)NULL)
1379 warning("Unexpected DOI in URL value ``%v'': move to separate DOI = \"...\" key/value in this entry");
1380 else if ( (stristr(the_URL, ".com/10.") != (char *)NULL) ||
1381 (stristr(the_URL, ".edu/10.") != (char *)NULL) ||
1382 (stristr(the_URL, ".gov/10.") != (char *)NULL) ||
1383 (stristr(the_URL, ".mil/10.") != (char *)NULL) ||
1384 (stristr(the_URL, ".net/10.") != (char *)NULL) ||
1385 (stristr(the_URL, ".org/10.") != (char *)NULL) )
1386 warning("Possible DOI in URL value ``%v'': if so, move to separate DOI = \"...\" key/value in this entry");
1387 }
1388 }
1389