1 /*
2  * index.c: create and collate index data structures
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include "halibut.h"
8 
9 static int compare_tags(void *av, void *bv);
10 static int compare_entries(void *av, void *bv);
11 
make_index(void)12 indexdata *make_index(void) {
13     indexdata *ret = snew(indexdata);
14     ret->tags = newtree234(compare_tags);
15     ret->entries = newtree234(compare_entries);
16     return ret;
17 }
18 
make_indextag(void)19 static indextag *make_indextag(void) {
20     indextag *ret = snew(indextag);
21     ret->name = NULL;
22     ret->implicit_text = NULL;
23     ret->explicit_texts = NULL;
24     ret->explicit_fpos = NULL;
25     ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
26     ret->refs = NULL;
27     return ret;
28 }
29 
compare_tags(void * av,void * bv)30 static int compare_tags(void *av, void *bv) {
31     indextag *a = (indextag *)av, *b = (indextag *)bv;
32     return ustricmp(a->name, b->name);
33 }
34 
compare_to_find_tag(void * av,void * bv)35 static int compare_to_find_tag(void *av, void *bv) {
36     wchar_t *a = (wchar_t *)av;
37     indextag *b = (indextag *)bv;
38     return ustricmp(a, b->name);
39 }
40 
compare_entries(void * av,void * bv)41 static int compare_entries(void *av, void *bv) {
42     indexentry *a = (indexentry *)av, *b = (indexentry *)bv;
43     return compare_wordlists(a->text, b->text);
44 }
45 
46 /*
47  * Back-end utility: find the indextag with a given name.
48  */
index_findtag(indexdata * idx,wchar_t * name)49 indextag *index_findtag(indexdata *idx, wchar_t *name) {
50     return find234(idx->tags, name, compare_to_find_tag);
51 }
52 
53 /*
54  * Add a \IM. `tags' points to a zero-terminated chain of
55  * zero-terminated strings ("first\0second\0thirdandlast\0\0").
56  * `text' points to a word list.
57  *
58  * Guarantee on calling sequence: all implicit merges are given
59  * before the explicit ones.
60  */
index_merge(indexdata * idx,int is_explicit,wchar_t * tags,word * text,filepos * fpos)61 void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text,
62 		 filepos *fpos) {
63     indextag *t, *existing;
64 
65     /*
66      * For an implicit merge, we want to remove all emphasis,
67      * because the chances are that the user didn't really want to
68      * index the term as emphasised.
69      */
70     {
71 	word *w;
72 
73 	for (w = text; w; w = w->next) {
74 	    if (w->type == word_Emph || w->type == word_Strong)
75 		w->type = word_Normal;
76 	    else if (w->type == word_EmphSpace || w->type == word_StrongSpace)
77 		w->type = word_WhiteSpace;
78 	    else if (w->type == word_EmphQuote || w->type == word_StrongQuote)
79 		w->type = word_Quote;
80 	}
81     }
82 
83     /*
84      * FIXME: want to warn on overlapping source sets.
85      */
86     for (; *tags; tags = uadv(tags)) {
87 	t = make_indextag();
88 	t->name = tags;
89 	existing = add234(idx->tags, t);
90 	if (existing == t) {
91 	    /*
92 	     * Duplicate this so we can free it independently.
93 	     */
94 	    t->name = ustrdup(tags);
95 
96 	    /*
97 	     * Every tag has an implicit \IM. So if this tag
98 	     * doesn't exist and we're explicit, then we should
99 	     * warn (and drop it, since it won't be referenced).
100 	     */
101 	    if (is_explicit) {
102 		err_nosuchidxtag(fpos, tags);
103 		continue;
104 	    }
105 
106 	    /*
107 	     * Otherwise, this is a new tag with an implicit \IM.
108 	     */
109 	    t->implicit_text = text;
110 	    t->implicit_fpos = *fpos;
111 	} else {
112 	    if (!is_explicit) {
113  		/*
114 		 * An implicit \IM for a tag that's had an implicit
115 		 * \IM before. FIXME: we should check the text
116 		 * against the existing text and warn on
117 		 * differences. And check the tag for case match
118 		 * against the existing tag, likewise.
119 		 */
120 
121 		/*
122 		 * Check the tag against its previous occurrence to
123 		 * see if the cases match.
124 		 */
125 		if (ustrcmp(t->name, existing->name)) {
126 		    err_indexcase(fpos, t->name,
127 			  &existing->implicit_fpos, existing->name);
128 		}
129 
130 		sfree(t);
131 	    } else {
132 		/*
133 		 * An explicit \IM added to a valid tag. In
134 		 * particular, this removes the implicit \IM if
135 		 * present.
136 		 */
137 		sfree(t);
138 		t = existing;
139 		if (t->implicit_text) {
140 		    free_word_list(t->implicit_text);
141 		    t->implicit_text = NULL;
142 		}
143 		if (t->nexplicit >= t->explicit_size) {
144 		    t->explicit_size = t->nexplicit + 8;
145 		    t->explicit_texts = sresize(t->explicit_texts,
146 						t->explicit_size, word *);
147 		    t->explicit_fpos = sresize(t->explicit_fpos,
148 					       t->explicit_size, filepos);
149 		}
150 		t->explicit_texts[t->nexplicit] = text;
151 		t->explicit_fpos[t->nexplicit] = *fpos;
152 		t->nexplicit++;
153 	    }
154 	}
155     }
156 }
157 
158 /*
159  * Build the final-form index. We now have every tag, with every
160  * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
161  * the RHSes of the \IMs, and sort by final form, and decorate the
162  * entries in the original 2-3 tree with pointers to the RHS
163  * entries.
164  */
build_index(indexdata * i)165 void build_index(indexdata *i) {
166     indextag *t;
167     word **ta;
168     filepos *fa;
169     int ti;
170     int j;
171 
172     for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
173 	if (t->implicit_text) {
174 	    t->nrefs = 1;
175 	    ta = &t->implicit_text;
176 	    fa = &t->implicit_fpos;
177 	} else {
178 	    t->nrefs = t->nexplicit;
179 	    ta = t->explicit_texts;
180 	    fa = t->explicit_fpos;
181 	}
182 	if (t->nrefs) {
183 	    t->refs = snewn(t->nrefs, indexentry *);
184 	    for (j = 0; j < t->nrefs; j++) {
185 		indexentry *ent = snew(indexentry);
186 		ent->text = *ta++;
187 		ent->fpos = *fa++;
188 		t->refs[j] = add234(i->entries, ent);
189 		if (t->refs[j] != ent)     /* duplicate */
190 		    sfree(ent);
191 	    }
192 	}
193     }
194 }
195 
cleanup_index(indexdata * i)196 void cleanup_index(indexdata *i) {
197     indextag *t;
198     indexentry *ent;
199     int ti;
200 
201     for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
202 	sfree(t->name);
203 	free_word_list(t->implicit_text);
204 	sfree(t->explicit_texts);
205 	sfree(t->refs);
206 	sfree(t);
207     }
208     freetree234(i->tags);
209     for (ti = 0; (ent = (indexentry *)index234(i->entries, ti))!=NULL; ti++) {
210 	sfree(ent);
211     }
212     freetree234(i->entries);
213     sfree(i);
214 }
215 
216 static void dbg_prtwordlist(int level, word *w);
217 static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text);
218 
index_debug(indexdata * i)219 void index_debug(indexdata *i) {
220     indextag *t;
221     indexentry *y;
222     int ti;
223     int j;
224 
225     printf("\nINDEX TAGS\n==========\n\n");
226     for (ti = 0; (t = (indextag *)index234(i->tags, ti)) != NULL; ti++) {
227         printf("\n");
228 	if (t->implicit_text)
229 	    dbg_prtmerge(0, t->name, t->implicit_text);
230 	for (j = 0; j < t->nexplicit; j++)
231 	    dbg_prtmerge(1, t->name, t->explicit_texts[j]);
232     }
233 
234     printf("\nINDEX ENTRIES\n=============\n\n");
235     for (ti = 0; (y = (indexentry *)index234(i->entries, ti)) != NULL; ti++) {
236         printf("\n");
237 	printf("{\n");
238 	dbg_prtwordlist(1, y->text);
239 	printf("}\n");
240     }
241 }
242 
dbg_prtmerge(int is_explicit,wchar_t * tag,word * text)243 static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) {
244     printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
245     for (; *tag; tag++)
246 	putchar(*tag);
247     printf("\" {\n");
248     dbg_prtwordlist(1, text);
249     printf("}\n");
250 }
251 
dbg_prtwordlist(int level,word * w)252 static void dbg_prtwordlist(int level, word *w) {
253     for (; w; w = w->next) {
254 	wchar_t *wp;
255 	printf("%*sword %d ", level*4, "", w->type);
256 	if (w->text) {
257 	    printf("\"");
258 	    for (wp = w->text; *wp; wp++)
259 		    putchar(*wp);
260 	    printf("\"");
261 	} else
262 	    printf("(no text)");
263 	if (w->alt) {
264 	    printf(" alt = {\n");
265 	    dbg_prtwordlist(level+1, w->alt);
266 	    printf("%*s}", level*4, "");
267 	}
268 	printf("\n");
269     }
270 }
271