1 /***********************************************************************
2 @C-file{
3 author = "Nelson H. F. Beebe",
4 version = "2.17",
5 date = "06 March 2021",
6 time = "18:31:33 MST",
7 filename = "isbn.c",
8 address = "University of Utah
9 Department of Mathematics, 110 LCB
10 155 S 1400 E RM 233
11 Salt Lake City, UT 84112-0090
12 USA",
13 telephone = "+1 801 581 5254",
14 FAX = "+1 801 581 4148",
15 URL = "http://www.math.utah.edu/~beebe",
16 checksum = "22111 1383 4584 35929",
17 email = "beebe@math.utah.edu, beebe@acm.org,
18 beebe@computer.org (Internet)",
19 codetable = "ISO/ASCII",
20 keywords = "bibliography, ISBN, hyphenation",
21 license = "GNU General Public License, version 2 or
22 later",
23 supported = "yes",
24 docstring = "This file contains code for hyphenating
25 International Standard Book Numbers (ISBNs),
26 using the function ISBN_hyphenate(s,t,maxs).
27 No other public objects are defined by this
28 file.
29
30 If this file is compiled with the
31 preprocessor symbol TEST defined, then a
32 standalone program, normally named bibisbn,
33 is produced that can be used to filter test
34 data containing ISBN key/value pairs
35 extracted from BibTeX files. For example,
36 the UNIX commands
37
38 bibclean -no-warn -max-width 0 *.bib | \
39 grep '^ *ISBN *=' >tmpfile
40 sed -e 's/-//g' tmpfile | bibisbn | diff tmpfile -
41
42 should display no differences in ISBN
43 numbers, except where their hyphenation was
44 originally incorrect, or missing.
45
46 The checksum field above contains a CRC-16
47 checksum as the first value, followed by the
48 equivalent of the standard UNIX wc (word
49 count) utility output of lines, words, and
50 characters. This is produced by Robert
51 Solovay's checksum utility.",
52 }
53 ***********************************************************************/
54
55 #include <config.h>
56 #include <assert.h>
57 #include "xstdbool.h"
58 #include "xstdlib.h"
59 #include "xstring.h"
60 #include "xctype.h"
61
62 RCSID("$Id: isbn.c,v 1.12 2014/04/03 18:05:23 beebe Exp beebe $")
63
64 #include "ch.h"
65 #include "isbn.h"
66 #include "yesorno.h"
67
68 #if !defined(MAX_ISBN_RANGE)
69 #define MAX_ISBN_RANGE 2560 /* about 8 times the default size */
70 #endif
71
72 static const char *ISBN_file = (const char*)NULL;
73
74 typedef struct
75 {
76 const char *begin;
77 const char *end;
78 const char *countries;
79 }
80 ISBN_range_t;
81
82 #include "isbn.tbl" /* generated from awk -f isbn-el-to-bibclean-isbn.awk isbn.el */
83
84 #define isISBNdigit(c) (Isdigit((int)(c)) || ((int)(c) == (int)'X') || ((int)(c) == (int)'x'))
85
86 #define isISBN_13digit(c) isISBNdigit(c)
87
88 #define MAX_ISBN 14 /* array size for complete ISBN and terminal NUL */
89 #define MAX_ISBN_13 18 /* array size for complete ISBN-13 and terminal NUL */
90
91 extern FILE *stdlog;
92
93 /*@null@*/ extern char *findfile ARGS((/*@null@*/ const char *pathlist_, /*@null@*/ const char *name_));
94 extern char *get_line ARGS((FILE *fp_));
95 extern char *Strdup ARGS((const char *s_));
96 extern FILE *tfopen ARGS((const char *filename_, const char *mode_));
97
98 void ISBN_hyphenate ARGS((/*@out@*/ char *s_,/*@out@*/ char *t_,size_t maxs_));
99 void ISBN_initialize ARGS((void));
100
101 static void add_ISBN_range ARGS((const char *the_begin,
102 const char *the_end,
103 const char *the_countries));
104 static void add_one_ISBN_range ARGS((const char *the_begin,
105 const char *the_end,
106 const char *the_countries,
107 size_t where));
108
109 static YESorNO is_valid_ISBN_prefix ARGS((const char *prefix));
110 static const char *fix_ISBN ARGS((const char *ISBN_));
111 static const char *hyphenate_one_ISBN ARGS((const char *prefix_,
112 const char *ISBN_));
113 static const char *hyphenate_one_ISBN_13 ARGS((const char *prefix_,
114 const char *ISBN_13_));
115 static int in_ISBN_range ARGS((const char *begin_,
116 const char *ISBN_,
117 const char *end_));
118 static YESorNO ISBN_match_country_language ARGS((const char *p1, const char *p2));
119 static const char *next_ISBN ARGS((const char *s_, const char **end_));
120 static const char *next_ISBN_13 ARGS((const char *s_, const char **end_));
121 static void squeeze_ISBN ARGS((char * out_ISBN_,
122 const char *in_ISBN_));
123 extern void warning ARGS((const char *msg_));
124
125 #if defined(TEST)
126
127 #define ISBN_DIGIT_VALUE(c) ((((int)(c) == (int)'X') || ((int)(c) == (int)'x')) ? 10 : \
128 ((int)(c) - (int)'0'))
129 /* correct only if digits are valid; */
130 /* the code below ensures that */
131
132 #define MAX_BUF 4096
133
134 #define stdlog stderr
135
136 int main ARGS((int argc_, char* argv_[]));
137
138 void ISBN_13_hyphenate ARGS((char *s_, char *t_, size_t maxs_));
139 static void ISBN_filter ARGS((const char *s_));
140 static void ISBN_strip_hyphens ARGS((char *s_));
141 static YESorNO ISBN_10_valid ARGS((const char *ISBN_));
142 static YESorNO ISBN_13_valid ARGS((const char *ISBN_));
143
144 long int line_number;
145
146 #if defined(HAVE_STDC)
147 int
main(int argc,char * argv[])148 main(int argc, char* argv[])
149 #else
150 int
151 main(argc,argv)
152 int argc;
153 char* argv[];
154 #endif
155 {
156 char buf[MAX_BUF];
157 const char * help_lines[] =
158 {
159 "bibisbn [ --help ] [ --version ] [ arguments ]",
160 "",
161 "Hyphenate apparent ISBN-10 and ISBN-13 data on the command line, or stdin.",
162 "Arguments or lines that resemble Web URLs are copied verbatim.",
163 "",
164 "With one or more arguments, filter them to stdout, and do not read stdin.",
165 "",
166 "Without arguments, filter lines from stdin to stdout.",
167 "",
168 "After careful comparison of input and output streams, it may be safe to",
169 "apply this program to most BibTeX files.",
170 (const char *)NULL
171 };
172
173 line_number = 0L;
174
175 if (argc > 1) /* hyphenate command-line arguments */
176 {
177 int k, n;
178
179 n = 0;
180
181 for (k = 1; k < argc; ++k)
182 {
183 if (k > 1) /* separate arguments by single space */
184 (void)fputc(' ', stdout);
185
186 if (strcmp(argv[k], "--help") == 0)
187 {
188 int m;
189
190 for (m = 0; help_lines[m] != (const char *)NULL; ++m)
191 (void)printf("%s\n", help_lines[m]);
192
193 return (EXIT_SUCCESS);
194
195 }
196 else if (strcmp(argv[k], "--version") == 0)
197 {
198 (void)printf("bibisbn %s [%s]\n", PACKAGE_VERSION, PACKAGE_DATE);
199
200 return (EXIT_SUCCESS);
201 }
202 else
203 {
204 ISBN_filter(argv[k]);
205 ++n;
206 }
207 }
208
209 if (n > 0)
210 (void)fputs("\n", stdout);
211 }
212 else /* filter lines from stdin to stdout */
213 {
214 while (fgets(buf,MAX_BUF,stdin) != (char*)NULL)
215 {
216 ++line_number;
217 ISBN_filter(buf);
218 }
219 }
220
221 return (EXIT_SUCCESS);
222 }
223
224 /*@noreturn@*/
225 #if defined(HAVE_STDC)
226 void /* issue an error message and die */
fatal(const char * msg)227 fatal(const char *msg)
228 #else /* K&R style */
229 void
230 fatal(msg) /* issue an error message and die */
231 const char *msg;
232 #endif
233 {
234 (void)fprintf(stdlog,"%s %s\n", ERROR_PREFIX, msg);
235 exit(EXIT_FAILURE);
236 }
237
238 #if defined(HAVE_STDC)
239 static YESorNO
ISBN_10_valid(const char * ISBN_10)240 ISBN_10_valid(const char *ISBN_10)
241 #else /* K&R style */
242 static YESorNO
243 ISBN_10_valid(s)
244 const char *ISBN_10;
245 #endif
246 {
247 /* checksum algorithm adapted from validate_ISBN() in chek.c */
248
249 int checksum, k;
250 YESorNO result;
251 static const int max_isbn_10 = 10;
252
253 for (checksum = 0, k = 1, result = NO; ISBN_10[k - 1] != '\0'; ++k)
254 {
255 if (k < max_isbn_10)
256 checksum += ISBN_DIGIT_VALUE(ISBN_10[k - 1]) * k;
257 else if (k == max_isbn_10)
258 {
259 if ((checksum % 11) == ISBN_DIGIT_VALUE(ISBN_10[k - 1]))
260 result = YES;
261 }
262 } /* end for (loop over ISBN_10[]) */
263
264 return (result);
265 }
266
267 #if defined(HAVE_STDC)
268 static YESorNO
ISBN_13_valid(const char * ISBN_13)269 ISBN_13_valid(const char *ISBN_13)
270 #else /* K&R style */
271 static YESorNO
272 ISBN_13_valid(ISBN_13)
273 const char *ISBN_13;
274 #endif
275 {
276 int checksum, k;
277 YESorNO result;
278 static const int max_isbn_13 = 13;
279
280 for (checksum = 0, k = 1, result = NO; ISBN_13[k - 1] != '\0'; ++k)
281 {
282 size_t weight;
283
284 weight = (k & 1) ? 1 : 3;
285
286 if (k < max_isbn_13)
287 checksum += ISBN_DIGIT_VALUE(ISBN_13[k - 1]) * weight ;
288 else if (k == max_isbn_13)
289 {
290 size_t digit_13, rem;
291
292 rem = checksum % 10;
293 digit_13 = (rem == 0) ? 0 : (10 - rem);
294
295 if (digit_13 == ISBN_DIGIT_VALUE(ISBN_13[k - 1]))
296 result = YES;
297 }
298 } /* end for (loop over ISBN_13[]) */
299
300 return (result);
301 }
302
303 #if defined(HAVE_STDC)
304 static void
ISBN_filter(const char * s)305 ISBN_filter(const char *s)
306 #else /* K&R style */
307 static void
308 ISBN_filter(s)
309 const char *s;
310 #endif
311 {
312 /*
313 ** Tokenize s, writing non-ISBN data verbatim to stdout, and
314 ** hyphenating (if possible) ISBN data. However, make an
315 ** exception for URL-like data in s[], and just write that
316 ** verbatim. That way, for many BibTeX files, this program can be
317 ** safely applied to the entire file. However, caution is
318 ** advised: carefully compare input and output before replacing
319 ** the input with the output!
320 */
321
322 if (strstr(s, "://") != (const char *)NULL)
323 (void)fputs(s, stdout); /* preserve URLs */
324 else
325 {
326 while (*s)
327 {
328 if (isdigit(*s))
329 {
330 char buf3[MAX_BUF];
331 char buf2[MAX_BUF];
332 char buf[MAX_BUF];
333 char msg[100];
334 size_t m, n;
335
336 for (m = 0; (m < (sizeof(buf) - 1)) && (isdigit(*s) || (*s == '-') || (*s == 'X') || (*s == 'x')); ++s)
337 {
338 buf[m++] = *s;
339 }
340
341 buf[m] = '\0';
342
343 (void)strcpy(buf3, buf);
344 ISBN_strip_hyphens(buf3);
345 n = strlen(buf3);
346
347 if (n == 10)
348 {
349 if (ISBN_10_valid(buf3) == YES)
350 {
351 (void)strcpy(buf, buf3);
352 ISBN_hyphenate(buf, buf2, MAX_BUF);
353 }
354 else
355 {
356 (void)snprintf(msg, sizeof(msg), "invalid ISBN-10 value [%.10s]", buf3);
357 warning(msg);
358 }
359 }
360 else if (n == 13)
361 {
362 if (ISBN_13_valid(buf3) == YES)
363 {
364 (void)strcpy(buf, buf3);
365 ISBN_13_hyphenate(buf, buf2, MAX_BUF);
366 }
367 else
368 {
369 (void)snprintf(msg, sizeof(msg), "invalid ISBN-13 value [%.13s]", buf3);
370 warning(msg);
371 }
372 }
373
374 n = strlen(buf);
375
376 if ((strncmp(buf, "978", 3) == 0) && (n == 16))
377 {
378 (void)fputs("978-", stdout);
379 (void)fputs(&buf[3], stdout);
380 }
381 else if ((strncmp(buf, "979", 3) == 0) && (n == 16))
382 {
383 (void)fputs("979-", stdout);
384 (void)fputs(&buf[3], stdout);
385 }
386 else
387 (void)fputs(buf,stdout);
388 }
389 else
390 {
391 (void)fputc(*s, stdout);
392 ++s;
393 }
394 }
395 }
396 }
397
398 #if defined(HAVE_STDC)
399 static void
ISBN_strip_hyphens(char * s)400 ISBN_strip_hyphens(char *s)
401 #else /* K&R style */
402 static void
403 ISBN_strip_hyphens(s)
404 char *s;
405 #endif
406 {
407 char *t;
408
409 for (t = s; *s; ++s)
410 {
411 if (*s != '-')
412 *t++ = *s;
413 }
414
415 *t = *s; /* copy NUL terminator */
416 }
417
418
419 #if defined(HAVE_STDC)
420 char*
Strdup(const char * s)421 Strdup(const char *s)
422 #else /* K&R style */
423 char*
424 Strdup(s)
425 const char *s;
426 #endif
427 {
428 char *p;
429 p = (char*)malloc(strlen(s)+1);
430 if (p == (char*)NULL)
431 fatal("Out of string memory");
432 return (strcpy(p,s));
433 }
434
435
436 #if defined(HAVE_STDC)
437 void
warning(const char * msg)438 warning(const char *msg) /* issue a warning message to stdlog */
439 #else /* K&R style */
440 void
441 warning(msg) /* issue a warning message to stdlog */
442 const char *msg;
443 #endif
444 {
445 (void)fprintf(stdlog,"%s:%ld:%s.\n", "-", line_number, msg);
446 (void)fflush(stdlog);
447 }
448
449 #endif /* defined(TEST) */
450
451
452 #if defined(HAVE_STDC)
453 static void
add_ISBN_range(const char * the_begin,const char * the_end,const char * the_countries)454 add_ISBN_range(const char *the_begin, const char *the_end,
455 const char *the_countries)
456 #else /* K&R style */
457 static void
458 add_ISBN_range(the_begin, the_end, the_countries)
459 const char *the_begin;
460 const char *the_end;
461 const char *the_countries;
462 #endif
463 {
464 /* Search the ISBN_range[] table circularly from the last search
465 position for the next non-empty slot matching the_begin, and
466 install the new triple (the_begin,the_end,the_countries) there.
467 Otherwise, add the triple at the end, if enough space remains. */
468
469 static int error_count = 0;
470 size_t k;
471 static size_t start = (size_t) 0;
472
473 /* Silently ignore invalid begin/end pairs */
474
475 if (the_begin == (const char *)NULL)
476 return;
477 else if (the_end == (const char *)NULL)
478 return;
479
480 if (the_begin[0] == '-')
481 start = 0; /* because deletions must always find the first match */
482
483 for (k = start;
484 (k < MAX_ISBN_RANGE) && (ISBN_range[k].begin != (const char *)NULL);
485 ++k)
486 {
487 if (ISBN_range[k].begin[0] == '-')
488 {
489 if (STREQUAL(ISBN_range[k].begin,the_begin))
490 { /* then already deleted this one */
491 start = k;
492 return;
493 }
494 else
495 continue; /* ignore `deleted' entries */
496 }
497 else if ((the_begin[0] == '-') && STREQUAL(ISBN_range[k].begin, the_begin + 1))
498 { /* then `delete' this entry by changing its begin prefix to start with a hyphen */
499 ISBN_range[k].begin = Strdup(the_begin);
500 start = k;
501 return;
502 }
503 else if (STREQUAL(ISBN_range[k].begin, the_begin))
504 {
505 add_one_ISBN_range(the_begin, the_end, the_countries, k);
506 start = k;
507 return;
508 }
509 }
510
511 /* If we fell through, then restart the search in the beginning of the table */
512
513 for (k = 0;
514 (k < start) && (ISBN_range[k].begin != (const char *)NULL); ++k)
515 {
516 if (ISBN_range[k].begin[0] == '-')
517 {
518 if (STREQUAL(ISBN_range[k].begin,the_begin))
519 { /* then already deleted this one */
520 start = k;
521 return;
522 }
523 else
524 continue; /* ignore `deleted' entries */
525 }
526 else if ((the_begin[0] == '-') && STREQUAL(ISBN_range[k].begin, the_begin + 1))
527 { /* then `delete' this entry by changing its begin prefix to start with a hyphen */
528 ISBN_range[k].begin = Strdup(the_begin);
529 start = k;
530 return;
531 }
532 else if (STREQUAL(ISBN_range[k].begin, the_begin))
533 {
534 add_one_ISBN_range(the_begin, the_end, the_countries, k);
535 start = k;
536 return;
537 }
538 }
539
540 /* If we fell through, then add the new entry at the first deleted
541 entry, or after the last used entry */
542 for (k = 0;
543 ((k < MAX_ISBN_RANGE) &&
544 (ISBN_range[k].begin != (const char *)NULL) &&
545 (ISBN_range[k].begin[0] != '\0'));
546 ++k)
547 continue;
548
549 if (k < (MAX_ISBN_RANGE - 1)) /* then have space to store this new entry */
550 {
551 start = k;
552 add_one_ISBN_range(the_begin, the_end, the_countries, k);
553 }
554 else if (++error_count == 1) /* no more than one error message */
555 (void)fprintf(stdlog,
556 "More than %lu ISBN ranges fills internal table\n",
557 (unsigned long)MAX_ISBN_RANGE);
558 }
559
560
561 #if defined(HAVE_STDC)
562 static void
add_one_ISBN_range(const char * the_begin,const char * the_end,const char * the_countries,size_t where)563 add_one_ISBN_range(const char *the_begin, const char *the_end,
564 const char *the_countries, size_t where)
565 #else /* K&R style */
566 static void
567 add_one_ISBN_range(the_begin, the_end, the_countries, where)
568 const char *the_begin;
569 const char *the_end;
570 const char *the_countries;
571 size_t where;
572 #endif
573 { /* add an entry at slot where, without bounds checking, but with
574 valid-value checking */
575 #define FMT_INVALID "Invalid country/language-publisher ISBN prefix [%s] in ISBN file [%s]\n"
576
577 if ((the_begin != (const char*)NULL) && (is_valid_ISBN_prefix(the_begin) == NO))
578 {
579 (void)fprintf(stdlog, FMT_INVALID, the_begin, ISBN_file);
580 return;
581 }
582 else if ((the_end != (const char*)NULL) && (is_valid_ISBN_prefix(the_end) == NO))
583 {
584 (void)fprintf(stdlog, FMT_INVALID, the_end, ISBN_file);
585 return;
586 }
587 else if ((the_begin != (const char*)NULL) && (the_end != (const char*)NULL) &&
588 STRGREATER(the_begin,the_end))
589 {
590 (void)fprintf(stdlog,
591 "Non-increasing country/language-publisher ISBN range [%s .. %s] in ISBN file [%s]\n",
592 the_begin, the_end, ISBN_file);
593 return;
594 }
595
596 FREE(ISBN_range[where].begin);
597 FREE(ISBN_range[where].end);
598 FREE(ISBN_range[where].countries);
599
600 if (the_begin == (const char *)NULL) /* sanity check for assumptions elsewhere in bibclean */
601 {
602 assert(the_end == (const char *)NULL);
603 assert(the_countries == (const char *)NULL);
604 }
605 else
606 {
607 if (the_countries == (const char *)NULL)
608 the_countries = "";
609
610 assert(the_end != (const char *)NULL);
611 assert(the_countries != (const char *)NULL);
612 }
613
614 ISBN_range[where].begin = (the_begin == (const char *)NULL) ? the_begin :
615 Strdup(the_begin);
616 ISBN_range[where].end = (the_end == (const char *)NULL) ? the_end :
617 Strdup(the_end);
618 ISBN_range[where].countries = (the_countries == (const char *)NULL) ? the_countries :
619 Strdup(the_countries);
620
621 #undef FMT_INVALID
622 }
623
624
625 #if !defined(TEST)
626 #if defined(HAVE_STDC)
627 void
do_ISBN_file(const char * pathlist,const char * name)628 do_ISBN_file(/*@null@*/ const char *pathlist, /*@null@*/ const char *name)
629 #else /* K&R style */
630 void
631 do_ISBN_file(pathlist,name)
632 /*@null@*/ const char *pathlist;
633 /*@null@*/ const char *name;
634 #endif
635 {
636 FILE *fp;
637 char *p;
638
639 if (name == (const char*)NULL)
640 return;
641
642 if ((ISBN_file = findfile(pathlist,name)) == (char*)NULL)
643 return; /* silently ignore missing files */
644
645 if ((fp = tfopen(ISBN_file,"r")) == (FILE*)NULL)
646 return; /* silently ignore missing files */
647
648 /* The ISBN file is expected to look like the output of
649 -print-ISBN-table: lines are (1) blank or empty, (2) comments
650 from percent to end-of-line, (3) pairs of whitespace-separated
651 (begin-prefix, end-prefix) values, or (4) triples of
652 whitespace-separated (begin-prefix, end-prefix values, countries).
653 In the latter case, the countries continue to end-of-line or a
654 comment character, whichever comes first, and may include
655 blanks. */
656 while ((p = get_line(fp)) != (char*)NULL)
657 {
658 #define TOKEN_SEPARATORS " \t"
659 const char *the_begin;
660 const char *the_end;
661 const char *the_countries;
662 char *comment;
663
664 comment = strchr(p, BIBTEX_COMMENT_PREFIX);
665 if (comment != (const char*)NULL)
666 *comment = '\0'; /* then discard comment text */
667
668 the_begin = strtok(p, TOKEN_SEPARATORS);
669 if (the_begin == (const char*)NULL)
670 continue; /* ignore blank or empty lines */
671 if (*the_begin == (char)BIBTEX_COMMENT_PREFIX)
672 continue; /* ignore comment lines */
673 the_end = strtok((char*)NULL, TOKEN_SEPARATORS);
674 if (the_end == (const char*)NULL)
675 {
676 (void)fprintf(stdlog,"Expected end-prefix after begin-prefix [%s] in ISBN file [%s]\n",
677 the_begin, ISBN_file);
678 continue;
679 }
680 the_countries = strtok((char*)NULL, "");
681 if (the_countries != (const char*)NULL)
682 { /* skip over leading space */
683 while (Isspace((int)*the_countries))
684 ++the_countries;
685 }
686 if ((the_countries != (const char*)NULL) && (*the_countries == '\0'))
687 the_countries = (const char*)NULL;
688 #if defined(DEBUG)
689 (void)fprintf(stdlog,
690 "DEBUG:\t[%s]\t[%s]\t[%s]\t[%s]\n",
691 ISBN_file,
692 the_begin,
693 the_end,
694 ((the_countries == (const char*)NULL) ? "" : the_countries));
695 #endif
696 add_ISBN_range(the_begin, the_end, the_countries);
697 }
698 (void)fclose(fp);
699 #undef TOKEN_SEPARATORS
700 }
701
702 #endif /* !defined(TEST) */
703
704
705 void
do_print_ISBN_table(VOID)706 do_print_ISBN_table(VOID)
707 {
708 size_t k;
709
710 /* For brevity and readability, we output the country/language
711 group prefix only when it changes, preceded by pair of newlines. */
712
713 (void)fprintf(stdlog, "%%%%%% ISBN ranges and country/language groups\n");
714 for (k = 0; (ISBN_range[k].begin != (const char *)NULL); ++k)
715 {
716 const char *country_names;
717
718 assert(ISBN_range[k].end != (const char *)NULL); /* sanity check on ISBN_range[] table logic */
719 assert(ISBN_range[k].countries != (const char *)NULL);
720
721 if (k == 0)
722 {
723 if (ISBN_range[k].countries[0] == '\0')
724 country_names = (const char *)NULL;
725 else
726 country_names = ISBN_range[k].countries;
727 }
728 else if (STREQUAL(ISBN_range[k-1].countries,ISBN_range[k].countries) &&
729 (ISBN_match_country_language(ISBN_range[k-1].begin,ISBN_range[k].begin) == YES))
730 country_names = (const char *)NULL;
731 else if (ISBN_range[k].countries[0] == '\0')
732 country_names = (const char *)NULL;
733 else
734 country_names = ISBN_range[k].countries;
735
736 /* We intentionally include `deleted' entries (beginning with a hyphen), so
737 as not to conceal information from the user. */
738 (void)fprintf(stdlog, "%s%-11s\t%-11s%s%s\n",
739 ((country_names == (const char *)NULL) ? "" : "\n\n"),
740 ISBN_range[k].begin,
741 ISBN_range[k].end,
742 ((country_names == (const char *)NULL) ? "" : "\t"),
743 ((country_names == (const char *)NULL) ? "" : country_names));
744 }
745 }
746
747
748 #if defined(HAVE_STDC)
749 static const char *
fix_ISBN(const char * ISBN)750 fix_ISBN(const char *ISBN)
751 #else /* K&R style */
752 static const char *
753 fix_ISBN(ISBN)
754 const char *ISBN;
755 #endif
756 {
757 size_t k;
758
759 for (k = 0; (ISBN_range[k].begin != (const char*)NULL); ++k)
760 {
761 if (ISBN_range[k].begin[0] == '-')
762 continue; /* ignored `deleted' entries */
763 if (in_ISBN_range(ISBN_range[k].begin, ISBN, ISBN_range[k].end)
764 == 0)
765 return (hyphenate_one_ISBN(ISBN_range[k].begin, ISBN));
766 }
767 return ((const char*)NULL);
768 }
769
770
771 #if defined(HAVE_STDC)
772 static const char *
fix_ISBN_13(const char * ISBN_13)773 fix_ISBN_13(const char *ISBN_13)
774 #else /* K&R style */
775 static const char *
776 fix_ISBN_13(ISBN_13)
777 const char *ISBN_13;
778 #endif
779 {
780 size_t k;
781
782 for (k = 0; (ISBN_range[k].begin != (const char*)NULL); ++k)
783 {
784 if (ISBN_range[k].begin[0] == '-')
785 continue; /* ignored `deleted' entries */
786 if (in_ISBN_range(ISBN_range[k].begin, &ISBN_13[3], ISBN_range[k].end)
787 == 0)
788 return (hyphenate_one_ISBN_13(ISBN_range[k].begin, ISBN_13));
789 }
790 return ((const char*)NULL);
791 }
792
793
794 #define skip_non_ISBN_digit(p) while ((*p != '\0') && !isISBNdigit((int)*p)) p++
795
796 #define skip_non_ISBN_13_digit(p) skip_non_ISBN_digit(p)
797
798 void
free_ISBN_table(VOID)799 free_ISBN_table(VOID)
800 {
801 int k; /* index into ISBN_range[] */
802
803 for (k = 0; k < MAX_ISBN_RANGE; ++k)
804 {
805 if (ISBN_range[k].begin != (const char *)NULL)
806 {
807 FREE(ISBN_range[k].begin);
808 ISBN_range[k].begin = (const char *)NULL;
809 }
810
811 if (ISBN_range[k].end != (const char *)NULL)
812 {
813 FREE(ISBN_range[k].end);
814 ISBN_range[k].end = (const char *)NULL;
815 }
816
817 if (ISBN_range[k].countries != (const char *)NULL)
818 {
819 FREE(ISBN_range[k].countries);
820 ISBN_range[k].countries = (const char *)NULL;
821 }
822 }
823 }
824
825
826 #if defined(HAVE_STDC)
827 static const char *
hyphenate_one_ISBN(const char * prefix,const char * ISBN)828 hyphenate_one_ISBN(const char *prefix, const char *ISBN)
829 #else /* K&R style */
830 static const char *
831 hyphenate_one_ISBN(prefix,ISBN)
832 const char *prefix;
833 const char *ISBN;
834 #endif
835 {
836 /*******************************************************************
837 Given a countrygroupnumber-publishernumber prefix, and an ISBN
838 optionally containing spaces and hyphens, return a pointer to an
839 unmodifiable properly-hyphenated ISBN stored in an internal buffer
840 that is overwritten on subsequent calls, or NULL if the correct
841 number of ISBN digits is not found.
842
843 The input ISBN can contain optional leading and trailing text,
844 such as a line from a BibTeX .bib file, like this:
845
846 ISBN = "0-387-09823-2 (paperback)",
847
848 ******************************************************************/
849
850 static char new_ISBN[MAX_ISBN];
851 int k;
852
853 skip_non_ISBN_digit(ISBN);
854
855 for (k = 0; (*ISBN != '\0') && (k < (MAX_ISBN - 2)); )
856 {
857 if (*prefix == '-')
858 {
859 new_ISBN[k++] = '-';
860 prefix++;
861 }
862 else if (*prefix != '\0')
863 {
864 skip_non_ISBN_digit(ISBN);
865 if (*ISBN == '\0')
866 break;
867 new_ISBN[k++] = *ISBN++;
868 prefix++;
869 if ((*prefix == '\0') && (k < MAX_ISBN))
870 new_ISBN[k++] = '-';
871 }
872 else /* past prefix */
873 {
874 skip_non_ISBN_digit(ISBN);
875 if (*ISBN == '\0')
876 break;
877 new_ISBN[k++] = *ISBN++;
878 }
879 }
880 if ((k == (MAX_ISBN - 2)) && !isISBNdigit(*ISBN))
881 {
882 new_ISBN[(MAX_ISBN - 2)] = new_ISBN[(MAX_ISBN - 3)];
883 /* move checksum digit to end */
884 new_ISBN[(MAX_ISBN - 3)] = '-'; /* prefix it with a hyphen */
885 new_ISBN[(MAX_ISBN - 1)] = '\0'; /* terminate the string */
886 return ((const char*)&new_ISBN[0]);
887 }
888 else
889 return ((const char*)NULL);
890 }
891
892
893 #if defined(HAVE_STDC)
894 static const char *
hyphenate_one_ISBN_13(const char * prefix,const char * ISBN_13)895 hyphenate_one_ISBN_13(const char *prefix, const char *ISBN_13)
896 #else /* K&R style */
897 static const char *
898 hyphenate_one_ISBN_13(prefix,ISBN_13)
899 const char *prefix;
900 const char *ISBN_13;
901 #endif
902 {
903
904 /*******************************************************************
905 Given an ISBN-10 countrygroupnumber-publishernumber prefix, and
906 an ISBN-13 optionally containing spaces and hyphens, return a
907 pointer to an unmodifiable properly-hyphenated ISBN-13 stored in
908 an internal buffer that is overwritten on subsequent calls, or
909 NULL if the correct number of ISBN-13 digits is not found.
910
911 The input ISBN-13 can contain optional leading and trailing text,
912 such as a line from a BibTeX .bib file, like this:
913
914 ISBN-13 = "978-0-387-09823-4 (paperback)",
915
916 ******************************************************************/
917
918 static char new_ISBN_13[MAX_ISBN_13];
919 int k;
920
921 skip_non_ISBN_13_digit(ISBN_13);
922
923 for (k = 0; (*ISBN_13 != '\0') && (k < (MAX_ISBN_13 - 2)); )
924 {
925 if (k == 0)
926 {
927 if ( (strncmp("978", ISBN_13, 3) == 0) ||
928 (strncmp("979", ISBN_13, 3) == 0) )
929 {
930 new_ISBN_13[k++] = *ISBN_13++;
931 new_ISBN_13[k++] = *ISBN_13++;
932 new_ISBN_13[k++] = *ISBN_13++;
933 new_ISBN_13[k++] = '-';
934 }
935 else
936 warning("ISBN-13 must begin with either 978 or 979: ``%v''");
937 }
938
939 if (*prefix == '-')
940 {
941 new_ISBN_13[k++] = '-';
942 prefix++;
943 }
944 else if (*prefix != '\0')
945 {
946 skip_non_ISBN_13_digit(ISBN_13);
947 if (*ISBN_13 == '\0')
948 break;
949 new_ISBN_13[k++] = *ISBN_13++;
950 prefix++;
951 if ((*prefix == '\0') && (k < MAX_ISBN_13))
952 new_ISBN_13[k++] = '-';
953 }
954 else /* past prefix */
955 {
956 skip_non_ISBN_13_digit(ISBN_13);
957 if (*ISBN_13 == '\0')
958 break;
959 new_ISBN_13[k++] = *ISBN_13++;
960 }
961 }
962 if ((k == (MAX_ISBN_13 - 2)) && !isISBN_13digit(*ISBN_13))
963 {
964 new_ISBN_13[(MAX_ISBN_13 - 2)] = new_ISBN_13[(MAX_ISBN_13 - 3)];
965 /* move checksum digit to end */
966 new_ISBN_13[(MAX_ISBN_13 - 3)] = '-'; /* prefix it with a hyphen */
967 new_ISBN_13[(MAX_ISBN_13 - 1)] = '\0'; /* terminate the string */
968 return ((const char*)&new_ISBN_13[0]);
969 }
970 else
971 return ((const char*)NULL);
972 }
973
974
975 #if defined(HAVE_STDC)
976 static int
in_ISBN_range(const char * begin,const char * ISBN,const char * end)977 in_ISBN_range(const char *begin, const char *ISBN, const char *end)
978 #else /* K&R style */
979 static int
980 in_ISBN_range(begin,ISBN,end)
981 const char *begin;
982 const char *ISBN;
983 const char *end;
984 #endif
985 {
986 /* Compare the countrygroupnumber-publishernumber part of ISBN
987 against the range (begin, end), and return -1 (less than),
988 0 (in range), or +1 (greater than). */
989
990 char begin_prefix[MAX_ISBN];
991 char end_prefix[MAX_ISBN];
992 char ISBN_prefix[MAX_ISBN];
993
994 squeeze_ISBN(begin_prefix, begin);
995 squeeze_ISBN(ISBN_prefix,ISBN);
996
997 if (strncmp(ISBN_prefix,begin_prefix,strlen(begin_prefix)) < 0)
998 return (-1);
999
1000 squeeze_ISBN(end_prefix,end);
1001 if (strncmp(end_prefix,ISBN_prefix,strlen(end_prefix)) < 0)
1002 return (1);
1003
1004 return (0);
1005 }
1006
1007
1008 #if defined(HAVE_STDC)
1009 static YESorNO
is_valid_ISBN_prefix(const char * prefix)1010 is_valid_ISBN_prefix(const char *prefix)
1011 #else /* K&R style */
1012 static YESorNO
1013 is_valid_ISBN_prefix(prefix)
1014 const char *prefix;
1015 #endif
1016 {
1017 /* Return YES if prefix matches "^[0-9]+-[0-9]+$" and has a length
1018 < 10, and else, NO */
1019 int n;
1020 int len;
1021
1022 for (len = 0, n = 0; Isdigit((int)*prefix); ++prefix)
1023 (len++, n++);
1024 if (n == 0)
1025 return (NO);
1026
1027 if (*prefix != '-')
1028 return (NO);
1029 prefix++;
1030 len++;
1031
1032 for (n = 0; Isdigit((int)*prefix); ++prefix)
1033 (len++, n++);
1034 if (n == 0)
1035 return (NO);
1036
1037 if (*prefix != '\0')
1038 return (NO);
1039 if (len >= 10) /* longest possible is 9999999-9[-9-9] */
1040 return (NO);
1041
1042 return (YES);
1043 }
1044
1045
1046 #if defined(HAVE_STDC)
1047 void
ISBN_hyphenate(char * s,char * t,size_t maxs)1048 ISBN_hyphenate(/*@out@*/ char *s, /*@out@*/ char *t, size_t maxs)
1049 #else /* K&R style */
1050 void
1051 ISBN_hyphenate(s,t,maxs)
1052 /*@out@*/ char *s;
1053 /*@out@*/ char *t;
1054 size_t maxs;
1055 #endif
1056 {
1057 const char *p;
1058 const char *r;
1059 const char *next;
1060 const char *start;
1061
1062 /* Given a string s[] containing one or more ISBNs, rewrite the */
1063 /* string in-place with correct ISBN hyphenation. Up to maxs-1 */
1064 /* non-NUL characters of s[] may be used. t[] is workspace, at */
1065 /* least as large as s[]. If insufficient workspace is */
1066 /* available, s[] is returned unchanged. */
1067
1068 t[0] = '\0';
1069
1070 #if defined(__WATCOMC__)
1071 /* Watcom 10.0 C++ compilers on IBM PC cannot handle the original
1072 version, which was written that way to avoid compiler warnings,
1073 sigh... */
1074 for (p = start = s; (p = next_ISBN(p,&next)) != (const char*)NULL;
1075 start = p)
1076 #else
1077 for (p = start = s; (p = next_ISBN(p,&next), p) != (const char*)NULL;
1078 start = p)
1079 #endif
1080 {
1081 if ((strlen(t) + (size_t)(p-start)) >= maxs)
1082 return; /* insufficient space: premature return */
1083 (void)strncat(t,start,(size_t)(p-start));
1084 r = fix_ISBN(p);
1085 if (r != (char*)NULL)
1086 {
1087 if ((strlen(t) + strlen(r)) >= maxs)
1088 return; /* insufficient space: premature return */
1089 (void)strcat(t,r);
1090 p = next;
1091 }
1092 else
1093 {
1094 if ((strlen(t) + 1) >= maxs)
1095 return; /* insufficient space: premature return */
1096 (void)strncat(t,p,1);
1097 ++p;
1098 }
1099 }
1100 if ((strlen(t) + strlen(start)) >= maxs)
1101 return; /* insufficient space: premature return */
1102 (void)strcat(t,start);
1103 (void)strcpy(s,t);
1104 }
1105
1106
1107 #if defined(HAVE_STDC)
1108 void
ISBN_13_hyphenate(char * s,char * t,size_t maxs)1109 ISBN_13_hyphenate(/*@out@*/ char *s, /*@out@*/ char *t, size_t maxs)
1110 #else /* K&R style */
1111 void
1112 ISBN_13_hyphenate(s,t,maxs)
1113 /*@out@*/ char *s;
1114 /*@out@*/ char *t;
1115 size_t maxs;
1116 #endif
1117 {
1118 const char *p;
1119 const char *r;
1120 const char *next;
1121 const char *start;
1122
1123 /* Given a string s[] containing one or more ISBN_13s, rewrite the */
1124 /* string in-place with correct ISBN_13 hyphenation. Up to maxs-1 */
1125 /* non-NUL characters of s[] may be used. t[] is workspace, at */
1126 /* least as large as s[]. If insufficient workspace is */
1127 /* available, s[] is returned unchanged. */
1128
1129 t[0] = '\0';
1130
1131 #if defined(__WATCOMC__)
1132 /* Watcom 10.0 C++ compilers on IBM PC cannot handle the original
1133 version, which was written that way to avoid compiler warnings,
1134 sigh... */
1135 for (p = start = s; (p = next_ISBN_13(p,&next)) != (const char*)NULL;
1136 start = p)
1137 #else
1138 for (p = start = s; (p = next_ISBN_13(p,&next), p) != (const char*)NULL;
1139 start = p)
1140 #endif
1141 {
1142 if ((strlen(t) + (size_t)(p-start)) >= maxs)
1143 return; /* insufficient space: premature return */
1144 (void)strncat(t,start,(size_t)(p-start));
1145 r = fix_ISBN_13(p);
1146 if (r != (char*)NULL)
1147 {
1148 if ((strlen(t) + strlen(r)) >= maxs)
1149 return; /* insufficient space: premature return */
1150 (void)strcat(t,r);
1151 p = next;
1152 }
1153 else
1154 {
1155 if ((strlen(t) + 1) >= maxs)
1156 return; /* insufficient space: premature return */
1157 (void)strncat(t,p,1);
1158 ++p;
1159 }
1160 }
1161 if ((strlen(t) + strlen(start)) >= maxs)
1162 return; /* insufficient space: premature return */
1163 (void)strcat(t,start);
1164 (void)strcpy(s,t);
1165 }
1166
1167
1168 void
ISBN_initialize(VOID)1169 ISBN_initialize(VOID)
1170 {
1171 size_t k;
1172
1173 /* Reallocate static-string entries in the ISBN_range[] table,
1174 so that we can later free them on exit, and avoid complaints
1175 about memory leaks. Logic elsewhere in bibclean requires
1176 that if the begin pointer is non--NULL, then the end and
1177 countries pointers are as well. */
1178
1179 for (k = 0; (ISBN_range[k].begin != (const char *)NULL); ++k)
1180 {
1181 assert(ISBN_range[k].end != (const char *)NULL);
1182 assert(ISBN_range[k].countries != (const char *)NULL);
1183
1184 ISBN_range[k].begin = Strdup(ISBN_range[k].begin);
1185 ISBN_range[k].end = Strdup(ISBN_range[k].end);
1186 ISBN_range[k].countries = Strdup(ISBN_range[k].countries);
1187 }
1188
1189 for (; k < MAX_ISBN_RANGE; ++k) /* fill rest of table with NULL pointers */
1190 {
1191 ISBN_range[k].begin = (const char *)NULL;
1192 ISBN_range[k].end = (const char *)NULL;
1193 ISBN_range[k].countries = (const char *)NULL;
1194 }
1195
1196 /* Check the consistency of the ISBN_range[] table, and then
1197 modify its compile-time setting so that all entries are
1198 guaranteed to have non-NULL countries. We need to ensure this,
1199 because an "-ISBN-file filename" option can `delete' table
1200 entries (by resetting the begin string to start with a hyphen). */
1201
1202 for (k = 0; (ISBN_range[k].begin != (const char *)NULL); ++k)
1203 {
1204 if (ISBN_range[k].end == (const char*)NULL)
1205 {
1206 (void)fprintf(stdlog,
1207 "Illegal ISBN range end [%s .. NULL]\n",
1208 ISBN_range[k].begin);
1209 ISBN_range[k].end = "";
1210 }
1211
1212 #define FMT_INVALID "Invalid country/language-publisher ISBN prefix [%s]\n"
1213
1214 if (is_valid_ISBN_prefix(ISBN_range[k].begin) == NO)
1215 {
1216 (void)fprintf(stdlog, FMT_INVALID, ISBN_range[k].begin);
1217 ISBN_range[k].begin = "";
1218 }
1219
1220 if (is_valid_ISBN_prefix(ISBN_range[k].end) == NO)
1221 {
1222 (void)fprintf(stdlog, FMT_INVALID, ISBN_range[k].end);
1223 ISBN_range[k].end = "";
1224 }
1225
1226 #undef FMT_INVALID
1227
1228 if (STRGREATER(ISBN_range[k].begin, ISBN_range[k].end))
1229 {
1230 (void)fprintf(stdlog,
1231 "Non-increasing country/language-publisher ISBN range [%s .. %s] deleted\n",
1232 ISBN_range[k].begin, ISBN_range[k].end);
1233 ISBN_range[k].begin = ISBN_range[k].end = "";
1234 }
1235
1236 if (ISBN_range[k].countries == (const char *)NULL)
1237 {
1238 if ((k == 0) ||
1239 (ISBN_match_country_language(ISBN_range[k-1].begin,ISBN_range[k].begin) == NO))
1240 {
1241 (void)fprintf(stdlog,
1242 "Missing country names for ISBN range [%s .. %s]\n",
1243 ISBN_range[k].begin, ISBN_range[k].end);
1244 ISBN_range[k].countries = Strdup("");
1245 }
1246 else if (ISBN_match_country_language(ISBN_range[k-1].begin,ISBN_range[k].begin) == YES)
1247 ISBN_range[k].countries = Strdup(ISBN_range[k - 1].countries);
1248 else
1249 ISBN_range[k].countries = Strdup("");
1250 }
1251 }
1252 }
1253
1254
1255 #if defined(HAVE_STDC)
1256 static YESorNO
ISBN_match_country_language(const char * p1,const char * p2)1257 ISBN_match_country_language(const char *p1, const char *p2)
1258 #else /* K&R style */
1259 static YESorNO
1260 ISBN_match_country_language(p1, p2)
1261 const char *p1;
1262 const char *p2;
1263 #endif
1264 {
1265 /* Return YES if the country/language prefixes of p1 and p2 match, else NO */
1266
1267 size_t k;
1268
1269 if ((p1 == (const char *)NULL) || (p2 == (const char *)NULL))
1270 return (NO);
1271
1272 for (k = 0; (p1[k] != '\0') && (p2[k] != '\0'); ++k)
1273 {
1274 if (p1[k] != p2[k])
1275 return (NO);
1276 else if (p1[k] == '-')
1277 return (YES);
1278 }
1279
1280 return (NO);
1281 }
1282
1283
1284 #if defined(HAVE_STDC)
1285 static const char *
next_ISBN(const char * s,const char ** next)1286 next_ISBN(const char *s,const char **next)
1287 #else /* K&R style */
1288 static const char *
1289 next_ISBN(s,next)
1290 const char *s;
1291 const char **next;
1292 #endif
1293 {
1294 size_t n;
1295 const char *start;
1296
1297 while (*s != '\0') /* scan over s[] */
1298 {
1299 for ( ; (*s != '\0') && !isISBNdigit(*s); ++s) /* ignore non-ISBN digits */
1300 continue;
1301
1302 for (n = 0, start = s; (*s != '\0'); ++s) /* scan over ISBN */
1303 {
1304 if (isISBNdigit(*s))
1305 {
1306 n++;
1307 if (n == 10) /* then we found an ISBN */
1308 {
1309 *next = s + 1;
1310 return (start);
1311 }
1312 }
1313 else if ((*s == ' ') || (*s == '-'))
1314 /* NO-OP */;
1315 else
1316 break;
1317 }
1318 }
1319 *next = (const char*)NULL;
1320 return ((const char*)NULL); /* no ISBN recognized */
1321 }
1322
1323 #if defined(HAVE_STDC)
1324 static const char *
next_ISBN_13(const char * s,const char ** next)1325 next_ISBN_13(const char *s,const char **next)
1326 #else /* K&R style */
1327 static const char *
1328 next_ISBN_13(s,next)
1329 const char *s;
1330 const char **next;
1331 #endif
1332 {
1333 size_t n;
1334 const char *start;
1335
1336 while (*s != '\0') /* scan over s[] */
1337 {
1338 for ( ; (*s != '\0') && !isISBN_13digit(*s); ++s) /* ignore non-ISBN_13 digits */
1339 continue;
1340
1341 for (n = 0, start = s; (*s != '\0'); ++s) /* scan over ISBN_13 */
1342 {
1343 if (isISBN_13digit(*s))
1344 {
1345 n++;
1346 if (n == 13) /* then we found an ISBN_13 */
1347 {
1348 *next = s + 1;
1349 return (start);
1350 }
1351 }
1352 else if ((*s == ' ') || (*s == '-'))
1353 /* NO-OP */;
1354 else
1355 break;
1356 }
1357 }
1358 *next = (const char*)NULL;
1359 return ((const char*)NULL); /* no ISBN_13 recognized */
1360 }
1361
1362 #if defined(HAVE_STDC)
1363 static void
squeeze_ISBN(char * out_ISBN,const char * in_ISBN)1364 squeeze_ISBN(char * out_ISBN, const char *in_ISBN)
1365 #else /* K&R style */
1366 static void
1367 squeeze_ISBN(out_ISBN,in_ISBN)
1368 char * out_ISBN;
1369 const char *in_ISBN;
1370 #endif
1371 { /* Copy in_ISBN to out_ISBN, eliminating non-ISBN characters */
1372 char *limit = out_ISBN + MAX_ISBN;
1373
1374 for ( ; out_ISBN < limit ; )
1375 {
1376 skip_non_ISBN_digit(in_ISBN);
1377 *out_ISBN = *in_ISBN;
1378 if (*in_ISBN == '\0')
1379 break;
1380 in_ISBN++;
1381 out_ISBN++;
1382 }
1383 }
1384