1 /*
2  * misc.c: miscellaneous useful items
3  */
4 
5 #include <stdarg.h>
6 #include "halibut.h"
7 
adv(char * s)8 char *adv(char *s) {
9     return s + 1 + strlen(s);
10 }
11 
12 struct stackTag {
13     void **data;
14     int sp;
15     int size;
16 };
17 
stk_new(void)18 stack stk_new(void) {
19     stack s;
20 
21     s = snew(struct stackTag);
22     s->sp = 0;
23     s->size = 0;
24     s->data = NULL;
25 
26     return s;
27 }
28 
stk_free(stack s)29 void stk_free(stack s) {
30     sfree(s->data);
31     sfree(s);
32 }
33 
stk_push(stack s,void * item)34 void stk_push(stack s, void *item) {
35     if (s->size <= s->sp) {
36 	s->size = s->sp + 32;
37 	s->data = sresize(s->data, s->size, void *);
38     }
39     s->data[s->sp++] = item;
40 }
41 
stk_pop(stack s)42 void *stk_pop(stack s) {
43     if (s->sp > 0)
44 	return s->data[--s->sp];
45     else
46 	return NULL;
47 }
48 
stk_top(stack s)49 void *stk_top(stack s) {
50     if (s->sp > 0)
51 	return s->data[s->sp-1];
52     else
53 	return NULL;
54 }
55 
56 /*
57  * Small routines to amalgamate a string from an input source.
58  */
59 const rdstring empty_rdstring = {0, 0, NULL};
60 const rdstringc empty_rdstringc = {0, 0, NULL};
61 
rdadd(rdstring * rs,wchar_t c)62 void rdadd(rdstring *rs, wchar_t c) {
63     if (rs->pos >= rs->size-1) {
64 	rs->size = rs->pos + 128;
65 	rs->text = sresize(rs->text, rs->size, wchar_t);
66     }
67     rs->text[rs->pos++] = c;
68     rs->text[rs->pos] = 0;
69 }
rdadds(rdstring * rs,wchar_t const * p)70 void rdadds(rdstring *rs, wchar_t const *p) {
71     int len = ustrlen(p);
72     if (rs->pos >= rs->size - len) {
73 	rs->size = rs->pos + len + 128;
74 	rs->text = sresize(rs->text, rs->size, wchar_t);
75     }
76     ustrcpy(rs->text + rs->pos, p);
77     rs->pos += len;
78 }
rdtrim(rdstring * rs)79 wchar_t *rdtrim(rdstring *rs) {
80     rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
81     return rs->text;
82 }
83 
rdaddc(rdstringc * rs,char c)84 void rdaddc(rdstringc *rs, char c) {
85     if (rs->pos >= rs->size-1) {
86 	rs->size = rs->pos + 128;
87 	rs->text = sresize(rs->text, rs->size, char);
88     }
89     rs->text[rs->pos++] = c;
90     rs->text[rs->pos] = 0;
91 }
rdaddsc(rdstringc * rs,char const * p)92 void rdaddsc(rdstringc *rs, char const *p) {
93     rdaddsn(rs, p, strlen(p));
94 }
rdaddsn(rdstringc * rs,char const * p,int len)95 void rdaddsn(rdstringc *rs, char const *p, int len) {
96     if (rs->pos >= rs->size - len) {
97 	rs->size = rs->pos + len + 128;
98 	rs->text = sresize(rs->text, rs->size, char);
99     }
100     memcpy(rs->text + rs->pos, p, len);
101     rs->pos += len;
102     rs->text[rs->pos] = 0;
103 }
rdtrimc(rdstringc * rs)104 char *rdtrimc(rdstringc *rs) {
105     rs->text = sresize(rs->text, rs->pos + 1, char);
106     return rs->text;
107 }
108 
compare_wordlists_literally(word * a,word * b)109 static int compare_wordlists_literally(word *a, word *b) {
110     int t;
111     while (a && b) {
112 	if (a->type != b->type)
113 	    return (a->type < b->type ? -1 : +1);   /* FIXME? */
114 	t = a->type;
115 	if ((t != word_Normal && t != word_Code &&
116 	     t != word_WeakCode && t != word_Emph && t != word_Strong) ||
117 	    a->alt || b->alt) {
118 	    int c;
119 	    if (a->text && b->text) {
120 		c = ustricmp(a->text, b->text);
121 		if (c)
122 		    return c;
123 	    }
124 	    c = compare_wordlists_literally(a->alt, b->alt);
125 	    if (c)
126 		return c;
127 	    a = a->next;
128 	    b = b->next;
129 	} else {
130 	    wchar_t *ap = a->text, *bp = b->text;
131 	    while (*ap && *bp) {
132 		wchar_t ac = *ap, bc = *bp;
133 		if (ac != bc)
134 		    return (ac < bc ? -1 : +1);
135 		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
136 		    a = a->next, ap = a->text;
137 		if (!*++bp && b->next && b->next->type == t && !b->next->alt)
138 		    b = b->next, bp = b->text;
139 	    }
140 	    if (*ap || *bp)
141 		return (*ap ? +1 : -1);
142 	    a = a->next;
143 	    b = b->next;
144 	}
145     }
146 
147     if (a || b)
148 	return (a ? +1 : -1);
149     else
150 	return 0;
151 }
152 
compare_wordlists(word * a,word * b)153 int compare_wordlists(word *a, word *b) {
154     /*
155      * First we compare only the alphabetic content of the word
156      * lists, with case not a factor. If that comes out equal,
157      * _then_ we compare the word lists literally.
158      */
159     struct {
160 	word *w;
161 	int i;
162 	wchar_t c;
163     } pos[2];
164 
165     pos[0].w = a;
166     pos[1].w = b;
167     pos[0].i = pos[1].i = 0;
168 
169     while (1) {
170 	/*
171 	 * Find the next alphabetic character in each word list.
172 	 */
173 	int k;
174 
175 	for (k = 0; k < 2; k++) {
176 	    /*
177 	     * Advance until we hit either an alphabetic character
178 	     * or the end of the word list.
179 	     */
180 	    while (1) {
181 		if (!pos[k].w) {
182 		    /* End of word list. */
183 		    pos[k].c = 0;
184 		    break;
185 		} else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
186 		    /* No characters remaining in this word; move on. */
187 		    pos[k].w = pos[k].w->next;
188 		    pos[k].i = 0;
189 		} else if (!uisalpha(pos[k].w->text[pos[k].i])) {
190 		    /* This character isn't alphabetic; move on. */
191 		    pos[k].i++;
192 		} else {
193 		    /* We have an alphabetic! Lowercase it and continue. */
194 		    pos[k].c = utolower(pos[k].w->text[pos[k].i]);
195 		    break;
196 		}
197 	    }
198 	}
199 
200 #ifdef HAS_WCSCOLL
201 	{
202 	    wchar_t a[2], b[2];
203 	    int ret;
204 
205 	    a[0] = pos[0].c;
206 	    b[0] = pos[1].c;
207 	    a[1] = b[1] = L'\0';
208 
209 	    ret = wcscoll(a, b);
210 	    if (ret)
211 		return ret;
212 	}
213 #else
214 	if (pos[0].c < pos[1].c)
215 	    return -1;
216 	else if (pos[0].c > pos[1].c)
217 	    return +1;
218 #endif
219 
220 	if (!pos[0].c)
221 	    break;		       /* they're equal */
222 
223 	pos[0].i++;
224 	pos[1].i++;
225     }
226 
227     /*
228      * If we reach here, the strings were alphabetically equal, so
229      * compare in more detail.
230      */
231     return compare_wordlists_literally(a, b);
232 }
233 
mark_attr_ends(word * words)234 void mark_attr_ends(word *words)
235 {
236     word *w, *wp;
237 
238     wp = NULL;
239     for (w = words; w; w = w->next) {
240 	int both;
241 	if (!isvis(w->type))
242 	    /* Invisible elements should not affect this calculation */
243 	    continue;
244 	both = (isattr(w->type) &&
245 		wp && isattr(wp->type) &&
246 		sameattr(wp->type, w->type));
247 	w->aux |= both ? attr_Always : attr_First;
248 	if (wp && !both) {
249 	    /* If previous considered word turns out to have been
250 	     * the end of a run, tidy it up. */
251 	    int wp_attr = attraux(wp->aux);
252 	    wp->aux = (wp->aux & ~attr_mask) |
253 		((wp_attr == attr_Always) ? attr_Last
254 			 /* attr_First */ : attr_Only);
255 	}
256 	wp = w;
257     }
258 
259     /* Tidy up last word touched */
260     if (wp) {
261 	int wp_attr = attraux(wp->aux);
262 	wp->aux = (wp->aux & ~attr_mask) |
263 	    ((wp_attr == attr_Always) ? attr_Last
264 		     /* attr_First */ : attr_Only);
265     }
266 }
267 
268 /*
269  * This function implements the optimal paragraph wrapping
270  * algorithm, pretty much as used in TeX. A cost function is
271  * defined for each line of the wrapped paragraph (typically some
272  * convex function of the difference between the line's length and
273  * its desired length), and a dynamic programming approach is used
274  * to optimise globally across all possible layouts of the
275  * paragraph to find the one with the minimum total cost.
276  *
277  * The function as implemented here gives a choice of two options
278  * for the cost function:
279  *
280  *  - If `natural_space' is zero, then the algorithm attempts to
281  *    make each line the maximum possible width (either `width' or
282  *    `subsequentwidth' depending on whether it's the first line of
283  *    the paragraph or not), and the cost function is simply the
284  *    square of the unused space at the end of each line. This is a
285  *    simple mechanism suitable for use in fixed-pitch environments
286  *    such as plain text displayed on a terminal.
287  *
288  *  - However, if `natural_space' is positive, the algorithm
289  *    assumes the medium is fully graphical and that the width of
290  *    space characters can be adjusted finely, and it attempts to
291  *    make each _space character_ the width given in
292  *    `natural_space'. (The provided width function should return
293  *    the _minimum_ acceptable width of a space character in this
294  *    case.) Therefore, the cost function for a line is dependent
295  *    on the number of spaces on that line as well as the amount by
296  *    which the line width differs from the optimum.
297  */
wrap_para(word * text,int width,int subsequentwidth,int (* widthfn)(void *,word *),void * ctx,int natural_space)298 wrappedline *wrap_para(word *text, int width, int subsequentwidth,
299 		       int (*widthfn)(void *, word *), void *ctx,
300 		       int natural_space) {
301     wrappedline *head = NULL, **ptr = &head;
302     int nwords, wordsize;
303     struct wrapword {
304 	word *begin, *end;
305 	int width;
306 	int spacewidth;
307 	int cost;
308 	int nwords;
309     } *wrapwords;
310     int i, j, n;
311 
312     /*
313      * Break the line up into wrappable components.
314      */
315     nwords = wordsize = 0;
316     wrapwords = NULL;
317     while (text) {
318 	if (nwords >= wordsize) {
319 	    wordsize = nwords + 64;
320 	    wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
321 	}
322 	wrapwords[nwords].width = 0;
323 	wrapwords[nwords].begin = text;
324 	while (text) {
325 	    wrapwords[nwords].width += widthfn(ctx, text);
326 	    wrapwords[nwords].end = text->next;
327 	    if (text->next && (text->next->type == word_WhiteSpace ||
328 			       text->next->type == word_EmphSpace ||
329 			       text->next->type == word_StrongSpace ||
330 			       text->breaks))
331 		break;
332 	    text = text->next;
333 	}
334 	if (text && text->next && (text->next->type == word_WhiteSpace ||
335                                    text->next->type == word_EmphSpace ||
336                                    text->next->type == word_StrongSpace)) {
337 	    wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
338 	    text = text->next;
339 	} else {
340 	    wrapwords[nwords].spacewidth = 0;
341 	}
342 	nwords++;
343 	if (text)
344 	    text = text->next;
345     }
346 
347     /*
348      * Perform the dynamic wrapping algorithm: work backwards from
349      * nwords-1, determining the optimal wrapping for each terminal
350      * subsequence of the paragraph.
351      */
352     for (i = nwords; i-- ;) {
353 	int best = -1;
354 	int bestcost = 0;
355 	int cost;
356 	int linelen = 0, spacewidth = 0, minspacewidth = 0;
357 	int nspaces;
358 	int thiswidth = (i == 0 ? width : subsequentwidth);
359 
360 	j = 0;
361 	nspaces = 0;
362 	while (i+j < nwords) {
363 	    /*
364 	     * See what happens if we put j+1 words on this line.
365 	     */
366 	    if (spacewidth) {
367 		nspaces++;
368 		minspacewidth = spacewidth;
369 	    }
370 	    linelen += spacewidth + wrapwords[i+j].width;
371 	    spacewidth = wrapwords[i+j].spacewidth;
372 	    j++;
373 	    if (linelen > thiswidth) {
374 		/*
375 		 * If we're over the width limit, abandon ship,
376 		 * _unless_ there is no best-effort yet (which will
377 		 * only happen if the first word is too long all by
378 		 * itself).
379 		 */
380 		if (best > 0)
381 		    break;
382 	    }
383 
384 	    /*
385 	     * Compute the cost of this line. The method of doing
386 	     * this differs hugely depending on whether
387 	     * natural_space is nonzero or not.
388 	     */
389 	    if (natural_space) {
390 		if (!nspaces && linelen > thiswidth) {
391 		    /*
392 		     * Special case: if there are no spaces at all
393 		     * on the line because one single word is too
394 		     * long for its line, cost is zero because
395 		     * there's nothing we can do about it anyway.
396 		     */
397 		    cost = 0;
398 		} else {
399 		    int shortfall = thiswidth - linelen;
400 		    int spaceextra = shortfall / (nspaces ? nspaces : 1);
401 		    int spaceshortfall = natural_space -
402 			(minspacewidth + spaceextra);
403 
404 		    if (i+j == nwords && spaceshortfall < 0) {
405 			/*
406 			 * Special case: on the very last line of
407 			 * the paragraph, we don't score penalty
408 			 * points for having to _stretch_ the line,
409 			 * since we won't stretch it anyway.
410 			 * However, we score penalties as normal
411 			 * for having to squeeze it.
412 			 */
413 			cost = 0;
414 		    } else {
415 			/*
416 			 * Squaring this number is tricky since
417 			 * it's liable to be quite big. Let's
418 			 * divide it through by 256.
419 			 */
420 			int x = spaceshortfall >> 8;
421 			int xf = spaceshortfall & 0xFF;
422 
423 			/*
424 			 * Not counting strange variable-fixed-
425 			 * point oddities, we are computing
426 			 *
427 			 *   (x+xf)^2 = x^2 + 2*x*xf + xf*xf
428 			 *
429 			 * except that _our_ xf is 256 times the
430 			 * one listed there.
431 			 */
432 
433 			cost = x * x;
434 			cost += (2 * x * xf) >> 8;
435 		    }
436 		}
437 	    } else {
438 		if (i+j == nwords) {
439 		    /*
440 		     * Special case: if we're at the very end of the
441 		     * paragraph, we don't score penalty points for the
442 		     * white space left on the line.
443 		     */
444 		    cost = 0;
445 		} else {
446 		    cost = (thiswidth-linelen) * (thiswidth-linelen);
447 		}
448 	    }
449 
450 	    /*
451 	     * Add in the cost of wrapping all lines after this
452 	     * point too.
453 	     */
454 	    if (i+j < nwords)
455 		cost += wrapwords[i+j].cost;
456 
457 	    /*
458 	     * We compare bestcost >= cost, not bestcost > cost,
459 	     * because in cases where the costs are identical we
460 	     * want to try to look like the greedy algorithm,
461 	     * because readers are likely to have spent a lot of
462 	     * time looking at greedy-wrapped paragraphs and
463 	     * there's no point violating the Principle of Least
464 	     * Surprise if it doesn't actually gain anything.
465 	     */
466 	    if (best < 0 || bestcost >= cost) {
467 		bestcost = cost;
468 		best = j;
469 	    }
470 	}
471 	/*
472 	 * Now we know the optimal answer for this terminal
473 	 * subsequence, so put it in wrapwords.
474 	 */
475 	wrapwords[i].cost = bestcost;
476 	wrapwords[i].nwords = best;
477     }
478 
479     /*
480      * We've wrapped the paragraph. Now build the output
481      * `wrappedline' list.
482      */
483     i = 0;
484     while (i < nwords) {
485 	wrappedline *w = snew(wrappedline);
486 	*ptr = w;
487 	ptr = &w->next;
488 	w->next = NULL;
489 
490 	n = wrapwords[i].nwords;
491 	w->begin = wrapwords[i].begin;
492 	w->end = wrapwords[i+n-1].end;
493 
494 	/*
495 	 * Count along the words to find nspaces and shortfall.
496 	 */
497 	w->nspaces = 0;
498 	w->shortfall = width;
499 	for (j = 0; j < n; j++) {
500 	    w->shortfall -= wrapwords[i+j].width;
501 	    if (j < n-1 && wrapwords[i+j].spacewidth) {
502 		w->nspaces++;
503 		w->shortfall -= wrapwords[i+j].spacewidth;
504 	    }
505 	}
506 	i += n;
507     }
508 
509     sfree(wrapwords);
510 
511     return head;
512 }
513 
wrap_free(wrappedline * w)514 void wrap_free(wrappedline *w) {
515     while (w) {
516 	wrappedline *t = w->next;
517 	sfree(w);
518 	w = t;
519     }
520 }
521 
cmdline_cfg_add(paragraph * cfg,char * string)522 void cmdline_cfg_add(paragraph *cfg, char *string)
523 {
524     wchar_t *ustring;
525     int upos, ulen, pos, len;
526 
527     ulen = 0;
528     while (cfg->keyword[ulen])
529 	ulen += 1 + ustrlen(cfg->keyword+ulen);
530     len = 0;
531     while (cfg->origkeyword[len])
532 	len += 1 + strlen(cfg->origkeyword+len);
533 
534     ustring = ufroma_locale_dup(string);
535 
536     upos = ulen;
537     ulen += 2 + ustrlen(ustring);
538     cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
539     ustrcpy(cfg->keyword+upos, ustring);
540     cfg->keyword[ulen-1] = L'\0';
541 
542     pos = len;
543     len += 2 + strlen(string);
544     cfg->origkeyword = sresize(cfg->origkeyword, len, char);
545     strcpy(cfg->origkeyword+pos, string);
546     cfg->origkeyword[len-1] = '\0';
547 
548     sfree(ustring);
549 }
550 
cmdline_cfg_new(void)551 paragraph *cmdline_cfg_new(void)
552 {
553     paragraph *p;
554 
555     p = snew(paragraph);
556     memset(p, 0, sizeof(*p));
557     p->type = para_Config;
558     p->next = NULL;
559     p->fpos.filename = "<command line>";
560     p->fpos.line = p->fpos.col = -1;
561     p->keyword = ustrdup(L"\0");
562     p->origkeyword = dupstr("\0");
563 
564     return p;
565 }
566 
cmdline_cfg_simple(char * string,...)567 paragraph *cmdline_cfg_simple(char *string, ...)
568 {
569     va_list ap;
570     char *s;
571     paragraph *p;
572 
573     p = cmdline_cfg_new();
574     cmdline_cfg_add(p, string);
575 
576     va_start(ap, string);
577     while ((s = va_arg(ap, char *)) != NULL)
578 	cmdline_cfg_add(p, s);
579     va_end(ap);
580 
581     return p;
582 }
583