1 /*
2 * cite - adds hyperlinks to bibliographic references in HTML
3 *
4 * The programs looks for strings of the form [[name]] (i.e., a
5 * bibliographic label inside a double pair of square brackets), e.g.,
6 * [[Knuth84]] or [[LieBos97]]. The label will be looked up in a
7 * bibliography database and if it is found, the string will be
8 * replaced by a pattern which is typically of the form <a
9 * href="...">[name]</a>, but the pattern can be changed
10 * with a command line option.
11 *
12 * If the string is of the form {{name}}, the name will be looked up,
13 * but the string will be copied unchanged.
14 *
15 * If the label is not found, a warning is printed and the string is
16 * left unchanged.
17 *
18 * All labels that are found are also stored, one label per line, in a
19 * separate file with extension .aux. This file can be used by mkbib
20 * to create the bibliography by extracting the corresponding
21 * bibliographic entries from the database.
22 *
23 * The bibliography database must be a refer-style database. Though
24 * for the purposes of this program all lines that don't start with
25 * "%L" or %K are ignored. Lines with "%L" are assumed to contain a
26 * label. Lines with %K are assumed to contain whitespace separated
27 * keywords, which are effectively aliases for the label. Entries must
28 * have one %L line and one or zero %K lines.
29 *
30 * Options:
31 *
32 * -b base
33 * Give the value for %b in the pattern.
34 *
35 * -p pattern
36 * The replacement for the string [[label]]. The default is
37 *
38 * <a href=\"%b#%L\" rel=\"biblioentry\">[%L]<!--{{%m%L}}--></a>
39 *
40 * %L will be replaced by the label, %b by the value of the -b
41 * option and %m by the marker (-m option).
42 *
43 * -a auxfile
44 * The name of the file in which the list of labels will be stored.
45 * Default is the name of the file given as argument, minus its
46 * extension, plus ".aux". If no file is give (input comes from
47 * stdin), the default name is "aux.aux".
48 *
49 * -m marker
50 * By default, the program looks for "[[name]]", but it can be
51 * made to look for "[[Xname]]" where X is some string, usually a
52 * symbol such as '!' or ='. This allows references to be
53 * classified, e.g., "[[!name]]" for normative references and
54 * "[[name]]" for non-normative references.
55 *
56 * -c
57 * Assume that every pair "<!--" and "-->" delimit a comment and
58 * do not process any [[label]] that occurs between them. Any
59 * "{{label}}" is processed as normal. This does not actually
60 * parse the input as HTML or XML and thus the program will
61 * mistake occurrences of these two strings inside CDATA sections
62 * or attribute values for comment delimiters.
63 *
64 * Copyright © 1994-2012 World Wide Web Consortium
65 * See http://www.w3.org/Consortium/Legal/copyright-software
66 *
67 * Author: Bert Bos <bert@w3.org>
68 * Created: 18 March 2000
69 * Version: $Id: hxcite.c,v 1.11 2018/02/15 19:02:36 bbos Exp $
70 **/
71
72 #include "config.h"
73 #ifdef HAVE_UNISTD_H
74 # include <unistd.h>
75 #endif
76 #include <assert.h>
77 #include <stdlib.h>
78 #include <errno.h>
79 #include <stdio.h>
80 #if STDC_HEADERS
81 # include <string.h>
82 #else
83 # ifndef HAVE_STRCHR
84 # define strchr index
85 # define strrchr rindex
86 # endif
87 # ifndef HAVE_STRSTR
88 # include "strstr.e"
89 # endif
90 #endif
91
92 #ifdef HAVE_SEARCH_H
93 # include <search.h>
94 #else
95 # include "hash.e"
96 #endif
97
98 #include <ctype.h>
99 #include <stdbool.h>
100 #include "export.h"
101 #include "heap.e"
102 #include "types.e"
103 #include "errexit.e"
104
105
106 /* Warning: arbitrary limits! */
107 #define LINESIZE 32768
108 #define HASHSIZE 4096 /* Size of hash table */
109
110 #define WS " \t\r\n\f" /* Separates %K keywords */
111
112 static string base = ""; /* URL of bibilography */
113 static string mark = ""; /* Flag after "'[[" */
114 static size_t marklen = 0; /* Length of mark */
115 static string prog; /* = argv[0] */
116 static string pattern =
117 "<a href=\"%b#%L\" rel=\"biblioentry\">[%L]<!--{{%m%L}}--></a>";
118 static FILE *aux;
119 static bool skip_comments = false; /* Whether to skip [[ inside <!----> */
120
121
122 /* get_label -- get the label for the keyword, or NULL */
get_label(const string keyword)123 static string get_label(const string keyword)
124 {
125 ENTRY *result, e = {keyword, NULL};
126
127 result = hsearch(e, FIND);
128 return result ? (string) result->data : NULL;
129 }
130
131
132 /* valid_label -- check if the label is well-formed */
valid_label(const string label)133 static bool valid_label(const string label)
134 {
135 int i;
136
137 for (i = 0; label[i]; i++)
138 if (! isalnum(label[i])
139 && label[i] != '-'
140 && label[i] != '_'
141 && label[i] != '.') return false;
142 return true;
143 }
144
145
146 /* expand_ref -- print the reformatted reference */
expand_ref(const string label)147 static void expand_ref(const string label)
148 {
149 int i;
150
151 /* ToDo: somehow allow sequence numbers for references [1], [2], etc. */
152 for (i = 0; pattern[i]; i++) {
153 if (pattern[i] != '%') {
154 putchar(pattern[i]);
155 } else {
156 switch (pattern[++i]) {
157 case '%': putchar('%'); break; /* Literal '%' */
158 case 'b': printf("%s", base); break; /* Base URL */
159 case 'L': printf("%s", label); break; /* Label */
160 case 'm': printf("%s", mark); break; /* Mark (-m option) */
161 default: break; /* Error in pattern */
162 }
163 }
164 }
165 }
166
167
168 /* process_line -- look for citations in a line */
process_line(const string text,const string fname,int lineno,bool * in_comment)169 EXPORT void process_line(const string text, const string fname, int lineno,
170 bool *in_comment)
171 {
172 string h = text, p, q, label = NULL, key;
173 char c;
174
175 /* Loop over occurrences of "[[" + mark + label + "]]"
176 and "{{" + mark + label + "}}" */
177
178 while (*in_comment ? (p = strpbrk(h, "-{")) : (p = strpbrk(h, "[{<"))) {
179
180 while (h != p) putchar(*(h++)); /* Print text up to here */
181
182 if (strncmp(p, "-->", 3) == 0) { /* End of comment */
183 putchar(*(h++));
184 *in_comment = false;
185 continue;
186 }
187 if (strncmp(p, "<!--", 4) == 0) { /* Begin of comment */
188 putchar(*(h++));
189 *in_comment = skip_comments;
190 continue;
191 }
192 if (strncmp(p, "{{", 2) && strncmp(p, "[[", 2)) { /* Not {{ or [[ */
193 putchar(*(h++));
194 continue;
195 }
196
197 /* Is there a corresponding closing bracket? */
198 if (! (q = strstr(p + 2, *p == '[' ? "]]" : "}}"))) break;
199
200 c = *p; /* Remember [ or { */
201
202 if (marklen == 0 || strncmp(p + 2, mark, marklen) == 0) {
203
204 p += 2 + marklen; /* Skip "[["/"{{" + mark */
205 key = newnstring(p, q - p); /* Extract the key */
206
207 if (! valid_label(key)) { /* Cannot be a key */
208 while (h != q) putchar(*(h++)); /* Copy unchanged */
209 putchar(*q); putchar(*(q+1));
210 } else if (!(label = get_label(key))) { /* No citation found: warn */
211 while (h != q) putchar(*(h++)); /* Copy unchanged */
212 putchar(*q); putchar(*(q+1));
213 fprintf(stderr, "%s:%d: warning: no bib entry found for %s\n",
214 fname ? fname : (string)"<stdin>", lineno, key);
215 } else if (c == '[') { /* Key found: expand */
216 expand_ref(label); /* Insert full reference */
217 fprintf(aux, "%s\n", label); /* Store label */
218 } else { /* "{{" so don't expand */
219 while (h != q) putchar(*(h++)); /* Copy unchanged */
220 putchar(*q); putchar(*(q+1));
221 fprintf(aux, "%s\n", label); /* Store label */
222 }
223 dispose(key);
224
225 } else { /* No valid mark */
226
227 while (h != q) putchar(*(h++)); /* Copy unchanged */
228 putchar(*q); putchar(*(q+1));
229 }
230 h = q + 2;
231 }
232
233 printf("%s", h); /* Print rest of text */
234 }
235
236
237 /* store_labels_and_keywords -- store label in hash table */
store_labels_and_keywords(const string label,const string keys)238 static void store_labels_and_keywords(const string label, const string keys)
239 {
240 string label1, h, b;
241 ENTRY entry;
242
243 assert(label);
244 label1 = strtok_r(label, WS, &b); /* Remove white space */
245 if (!label1) return; /* Empty label */
246 entry.key = newstring(label1);
247 entry.data = newstring(label1);
248 if (!hsearch(entry, ENTER)) errexit("%s: %s\n", prog, strerror(errno));
249 if (keys) {
250 for (h = strtok_r(keys, WS, &b); h; h = strtok_r(NULL, WS, &b)) {
251 entry.key = newstring(h);
252 entry.data = newstring(label1);
253 if (!hsearch(entry, ENTER)) errexit("%s: %s\n", prog, strerror(errno));
254 }
255 }
256 }
257
258
259 /* parse_db -- extract all labels from the refer-style database */
parse_db(const string db)260 static void parse_db(const string db)
261 {
262 char line[LINESIZE];
263 FILE *f;
264 int e;
265 string label = NULL, keywords = NULL;
266
267 if (!(f = fopen(db,"r"))) errexit("%s: %s: %s\n", prog, db, strerror(errno));
268
269 /* Initialize the hash table */
270 if (! hcreate(HASHSIZE)) errexit("%s: %s\n", prog, strerror(errno));
271
272 /* Search for %L lines */
273 clearerr(f);
274 while (fgets(line, sizeof(line), f)) {
275 if (line[0] != '%') { /* We're outside an entry */
276 if (label) store_labels_and_keywords(label, keywords);
277 dispose(label);
278 dispose(keywords);
279 } else if (strncmp(line, "%L ", 3) == 0) {
280 label = newstring(line + 3);
281 } else if (strncmp(line, "%K ", 3) == 0) {
282 keywords = newstring(line + 3);
283 }
284 }
285 if (label) store_labels_and_keywords(label, keywords);
286
287 if ((e = ferror(f))) errexit("%s: %s: %s\n", prog, db, strerror(e));
288
289 if (fclose(f) != 0) errexit("%s: %s: %s\n", prog, db, strerror(errno));
290 }
291
292
293 /* usage -- print usage message and exit */
usage(void)294 static void usage(void)
295 {
296 errexit("Usage: %s [-b base] [-p pattern] [-a auxfile] [-c] [-v] bib-file [HTML-file]\n",
297 prog);
298 }
299
300
main(int argc,char * argv[])301 int main(int argc, char *argv[])
302 {
303 char line[LINESIZE];
304 string h, auxfile = NULL, dbfile = NULL, infile = NULL;
305 bool in_comment = false;
306 int e, lineno, c;
307 FILE *f;
308
309 /* Parse command line arguments */
310 prog = argv[0];
311 while ((c = getopt(argc, argv, "b:p:a:m:cv")) != -1) {
312 switch (c) {
313 case 'b': base = optarg; break; /* Set base of URL */
314 case 'p': pattern = optarg; break; /* Form of expanded ref */
315 case 'a': auxfile = optarg; break; /* Name of auxfile */
316 case 'm': mark = optarg; marklen = strlen(mark); break; /* After "[[" */
317 case 'c': skip_comments = true; break; /* Skip [[ in comments */
318 case 'v': printf("Version: %s %s\n", PACKAGE, VERSION); return 0;
319 default: usage();
320 }
321 }
322 if (optind == argc || argc > optind + 2) usage();
323
324 dbfile = argv[optind++];
325 if (optind != argc) infile = argv[optind++];
326
327 /* Read the labels from the bibliography database */
328 parse_db(dbfile);
329
330 /* Construct auxfile */
331 if (! auxfile) {
332 if (infile) {
333 newarray(auxfile, strlen(infile) + 5);
334 strcpy(auxfile, infile);
335 if ((h = strrchr(auxfile, '.'))) *h = '\0';
336 strcat(auxfile, ".aux");
337 } else {
338 auxfile = "aux.aux";
339 }
340 }
341 if (! (aux = fopen(auxfile, "w")))
342 errexit("%s: %s: %s\n", prog, auxfile, strerror(errno));
343
344 /* Open input file or use stdin */
345 f = infile ? fopen(infile, "r") : stdin;
346 if (!f) errexit("%s: %s: %s\n", prog, infile, strerror(errno));
347
348 /* Read input line by line */
349 clearerr(f);
350 lineno = 1;
351 while (fgets(line, sizeof(line), f))
352 process_line(line, infile, lineno++, &in_comment);
353 if ((e = ferror(f))) errexit("%s: %s\n", prog, strerror(e));
354
355 fclose(aux);
356 fclose(f);
357 return 0;
358 }
359