1 /*
2 * misc.c: miscellaneous useful items
3 */
4
5 #include <stdarg.h>
6 #include "halibut.h"
7
adv(char * s)8 char *adv(char *s) {
9 return s + 1 + strlen(s);
10 }
11
12 struct stackTag {
13 void **data;
14 int sp;
15 int size;
16 };
17
stk_new(void)18 stack stk_new(void) {
19 stack s;
20
21 s = snew(struct stackTag);
22 s->sp = 0;
23 s->size = 0;
24 s->data = NULL;
25
26 return s;
27 }
28
stk_free(stack s)29 void stk_free(stack s) {
30 sfree(s->data);
31 sfree(s);
32 }
33
stk_push(stack s,void * item)34 void stk_push(stack s, void *item) {
35 if (s->size <= s->sp) {
36 s->size = s->sp + 32;
37 s->data = sresize(s->data, s->size, void *);
38 }
39 s->data[s->sp++] = item;
40 }
41
stk_pop(stack s)42 void *stk_pop(stack s) {
43 if (s->sp > 0)
44 return s->data[--s->sp];
45 else
46 return NULL;
47 }
48
stk_top(stack s)49 void *stk_top(stack s) {
50 if (s->sp > 0)
51 return s->data[s->sp-1];
52 else
53 return NULL;
54 }
55
56 /*
57 * Small routines to amalgamate a string from an input source.
58 */
59 const rdstring empty_rdstring = {0, 0, NULL};
60 const rdstringc empty_rdstringc = {0, 0, NULL};
61
rdadd(rdstring * rs,wchar_t c)62 void rdadd(rdstring *rs, wchar_t c) {
63 if (rs->pos >= rs->size-1) {
64 rs->size = rs->pos + 128;
65 rs->text = sresize(rs->text, rs->size, wchar_t);
66 }
67 rs->text[rs->pos++] = c;
68 rs->text[rs->pos] = 0;
69 }
rdadds(rdstring * rs,wchar_t const * p)70 void rdadds(rdstring *rs, wchar_t const *p) {
71 int len = ustrlen(p);
72 if (rs->pos >= rs->size - len) {
73 rs->size = rs->pos + len + 128;
74 rs->text = sresize(rs->text, rs->size, wchar_t);
75 }
76 ustrcpy(rs->text + rs->pos, p);
77 rs->pos += len;
78 }
rdtrim(rdstring * rs)79 wchar_t *rdtrim(rdstring *rs) {
80 rs->text = sresize(rs->text, rs->pos + 1, wchar_t);
81 return rs->text;
82 }
83
rdaddc(rdstringc * rs,char c)84 void rdaddc(rdstringc *rs, char c) {
85 if (rs->pos >= rs->size-1) {
86 rs->size = rs->pos + 128;
87 rs->text = sresize(rs->text, rs->size, char);
88 }
89 rs->text[rs->pos++] = c;
90 rs->text[rs->pos] = 0;
91 }
rdaddsc(rdstringc * rs,char const * p)92 void rdaddsc(rdstringc *rs, char const *p) {
93 rdaddsn(rs, p, strlen(p));
94 }
rdaddsn(rdstringc * rs,char const * p,int len)95 void rdaddsn(rdstringc *rs, char const *p, int len) {
96 if (rs->pos >= rs->size - len) {
97 rs->size = rs->pos + len + 128;
98 rs->text = sresize(rs->text, rs->size, char);
99 }
100 memcpy(rs->text + rs->pos, p, len);
101 rs->pos += len;
102 rs->text[rs->pos] = 0;
103 }
rdtrimc(rdstringc * rs)104 char *rdtrimc(rdstringc *rs) {
105 rs->text = sresize(rs->text, rs->pos + 1, char);
106 return rs->text;
107 }
108
compare_wordlists_literally(word * a,word * b)109 static int compare_wordlists_literally(word *a, word *b) {
110 int t;
111 while (a && b) {
112 if (a->type != b->type)
113 return (a->type < b->type ? -1 : +1); /* FIXME? */
114 t = a->type;
115 if ((t != word_Normal && t != word_Code &&
116 t != word_WeakCode && t != word_Emph && t != word_Strong) ||
117 a->alt || b->alt) {
118 int c;
119 if (a->text && b->text) {
120 c = ustricmp(a->text, b->text);
121 if (c)
122 return c;
123 }
124 c = compare_wordlists_literally(a->alt, b->alt);
125 if (c)
126 return c;
127 a = a->next;
128 b = b->next;
129 } else {
130 wchar_t *ap = a->text, *bp = b->text;
131 while (*ap && *bp) {
132 wchar_t ac = *ap, bc = *bp;
133 if (ac != bc)
134 return (ac < bc ? -1 : +1);
135 if (!*++ap && a->next && a->next->type == t && !a->next->alt)
136 a = a->next, ap = a->text;
137 if (!*++bp && b->next && b->next->type == t && !b->next->alt)
138 b = b->next, bp = b->text;
139 }
140 if (*ap || *bp)
141 return (*ap ? +1 : -1);
142 a = a->next;
143 b = b->next;
144 }
145 }
146
147 if (a || b)
148 return (a ? +1 : -1);
149 else
150 return 0;
151 }
152
compare_wordlists(word * a,word * b)153 int compare_wordlists(word *a, word *b) {
154 /*
155 * First we compare only the alphabetic content of the word
156 * lists, with case not a factor. If that comes out equal,
157 * _then_ we compare the word lists literally.
158 */
159 struct {
160 word *w;
161 int i;
162 wchar_t c;
163 } pos[2];
164
165 pos[0].w = a;
166 pos[1].w = b;
167 pos[0].i = pos[1].i = 0;
168
169 while (1) {
170 /*
171 * Find the next alphabetic character in each word list.
172 */
173 int k;
174
175 for (k = 0; k < 2; k++) {
176 /*
177 * Advance until we hit either an alphabetic character
178 * or the end of the word list.
179 */
180 while (1) {
181 if (!pos[k].w) {
182 /* End of word list. */
183 pos[k].c = 0;
184 break;
185 } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
186 /* No characters remaining in this word; move on. */
187 pos[k].w = pos[k].w->next;
188 pos[k].i = 0;
189 } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
190 /* This character isn't alphabetic; move on. */
191 pos[k].i++;
192 } else {
193 /* We have an alphabetic! Lowercase it and continue. */
194 pos[k].c = utolower(pos[k].w->text[pos[k].i]);
195 break;
196 }
197 }
198 }
199
200 #ifdef HAS_WCSCOLL
201 {
202 wchar_t a[2], b[2];
203 int ret;
204
205 a[0] = pos[0].c;
206 b[0] = pos[1].c;
207 a[1] = b[1] = L'\0';
208
209 ret = wcscoll(a, b);
210 if (ret)
211 return ret;
212 }
213 #else
214 if (pos[0].c < pos[1].c)
215 return -1;
216 else if (pos[0].c > pos[1].c)
217 return +1;
218 #endif
219
220 if (!pos[0].c)
221 break; /* they're equal */
222
223 pos[0].i++;
224 pos[1].i++;
225 }
226
227 /*
228 * If we reach here, the strings were alphabetically equal, so
229 * compare in more detail.
230 */
231 return compare_wordlists_literally(a, b);
232 }
233
mark_attr_ends(word * words)234 void mark_attr_ends(word *words)
235 {
236 word *w, *wp;
237
238 wp = NULL;
239 for (w = words; w; w = w->next) {
240 int both;
241 if (!isvis(w->type))
242 /* Invisible elements should not affect this calculation */
243 continue;
244 both = (isattr(w->type) &&
245 wp && isattr(wp->type) &&
246 sameattr(wp->type, w->type));
247 w->aux |= both ? attr_Always : attr_First;
248 if (wp && !both) {
249 /* If previous considered word turns out to have been
250 * the end of a run, tidy it up. */
251 int wp_attr = attraux(wp->aux);
252 wp->aux = (wp->aux & ~attr_mask) |
253 ((wp_attr == attr_Always) ? attr_Last
254 /* attr_First */ : attr_Only);
255 }
256 wp = w;
257 }
258
259 /* Tidy up last word touched */
260 if (wp) {
261 int wp_attr = attraux(wp->aux);
262 wp->aux = (wp->aux & ~attr_mask) |
263 ((wp_attr == attr_Always) ? attr_Last
264 /* attr_First */ : attr_Only);
265 }
266 }
267
268 /*
269 * This function implements the optimal paragraph wrapping
270 * algorithm, pretty much as used in TeX. A cost function is
271 * defined for each line of the wrapped paragraph (typically some
272 * convex function of the difference between the line's length and
273 * its desired length), and a dynamic programming approach is used
274 * to optimise globally across all possible layouts of the
275 * paragraph to find the one with the minimum total cost.
276 *
277 * The function as implemented here gives a choice of two options
278 * for the cost function:
279 *
280 * - If `natural_space' is zero, then the algorithm attempts to
281 * make each line the maximum possible width (either `width' or
282 * `subsequentwidth' depending on whether it's the first line of
283 * the paragraph or not), and the cost function is simply the
284 * square of the unused space at the end of each line. This is a
285 * simple mechanism suitable for use in fixed-pitch environments
286 * such as plain text displayed on a terminal.
287 *
288 * - However, if `natural_space' is positive, the algorithm
289 * assumes the medium is fully graphical and that the width of
290 * space characters can be adjusted finely, and it attempts to
291 * make each _space character_ the width given in
292 * `natural_space'. (The provided width function should return
293 * the _minimum_ acceptable width of a space character in this
294 * case.) Therefore, the cost function for a line is dependent
295 * on the number of spaces on that line as well as the amount by
296 * which the line width differs from the optimum.
297 */
wrap_para(word * text,int width,int subsequentwidth,int (* widthfn)(void *,word *),void * ctx,int natural_space)298 wrappedline *wrap_para(word *text, int width, int subsequentwidth,
299 int (*widthfn)(void *, word *), void *ctx,
300 int natural_space) {
301 wrappedline *head = NULL, **ptr = &head;
302 int nwords, wordsize;
303 struct wrapword {
304 word *begin, *end;
305 int width;
306 int spacewidth;
307 int cost;
308 int nwords;
309 } *wrapwords;
310 int i, j, n;
311
312 /*
313 * Break the line up into wrappable components.
314 */
315 nwords = wordsize = 0;
316 wrapwords = NULL;
317 while (text) {
318 if (nwords >= wordsize) {
319 wordsize = nwords + 64;
320 wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
321 }
322 wrapwords[nwords].width = 0;
323 wrapwords[nwords].begin = text;
324 while (text) {
325 wrapwords[nwords].width += widthfn(ctx, text);
326 wrapwords[nwords].end = text->next;
327 if (text->next && (text->next->type == word_WhiteSpace ||
328 text->next->type == word_EmphSpace ||
329 text->next->type == word_StrongSpace ||
330 text->breaks))
331 break;
332 text = text->next;
333 }
334 if (text && text->next && (text->next->type == word_WhiteSpace ||
335 text->next->type == word_EmphSpace ||
336 text->next->type == word_StrongSpace)) {
337 wrapwords[nwords].spacewidth = widthfn(ctx, text->next);
338 text = text->next;
339 } else {
340 wrapwords[nwords].spacewidth = 0;
341 }
342 nwords++;
343 if (text)
344 text = text->next;
345 }
346
347 /*
348 * Perform the dynamic wrapping algorithm: work backwards from
349 * nwords-1, determining the optimal wrapping for each terminal
350 * subsequence of the paragraph.
351 */
352 for (i = nwords; i-- ;) {
353 int best = -1;
354 int bestcost = 0;
355 int cost;
356 int linelen = 0, spacewidth = 0, minspacewidth = 0;
357 int nspaces;
358 int thiswidth = (i == 0 ? width : subsequentwidth);
359
360 j = 0;
361 nspaces = 0;
362 while (i+j < nwords) {
363 /*
364 * See what happens if we put j+1 words on this line.
365 */
366 if (spacewidth) {
367 nspaces++;
368 minspacewidth = spacewidth;
369 }
370 linelen += spacewidth + wrapwords[i+j].width;
371 spacewidth = wrapwords[i+j].spacewidth;
372 j++;
373 if (linelen > thiswidth) {
374 /*
375 * If we're over the width limit, abandon ship,
376 * _unless_ there is no best-effort yet (which will
377 * only happen if the first word is too long all by
378 * itself).
379 */
380 if (best > 0)
381 break;
382 }
383
384 /*
385 * Compute the cost of this line. The method of doing
386 * this differs hugely depending on whether
387 * natural_space is nonzero or not.
388 */
389 if (natural_space) {
390 if (!nspaces && linelen > thiswidth) {
391 /*
392 * Special case: if there are no spaces at all
393 * on the line because one single word is too
394 * long for its line, cost is zero because
395 * there's nothing we can do about it anyway.
396 */
397 cost = 0;
398 } else {
399 int shortfall = thiswidth - linelen;
400 int spaceextra = shortfall / (nspaces ? nspaces : 1);
401 int spaceshortfall = natural_space -
402 (minspacewidth + spaceextra);
403
404 if (i+j == nwords && spaceshortfall < 0) {
405 /*
406 * Special case: on the very last line of
407 * the paragraph, we don't score penalty
408 * points for having to _stretch_ the line,
409 * since we won't stretch it anyway.
410 * However, we score penalties as normal
411 * for having to squeeze it.
412 */
413 cost = 0;
414 } else {
415 /*
416 * Squaring this number is tricky since
417 * it's liable to be quite big. Let's
418 * divide it through by 256.
419 */
420 int x = spaceshortfall >> 8;
421 int xf = spaceshortfall & 0xFF;
422
423 /*
424 * Not counting strange variable-fixed-
425 * point oddities, we are computing
426 *
427 * (x+xf)^2 = x^2 + 2*x*xf + xf*xf
428 *
429 * except that _our_ xf is 256 times the
430 * one listed there.
431 */
432
433 cost = x * x;
434 cost += (2 * x * xf) >> 8;
435 }
436 }
437 } else {
438 if (i+j == nwords) {
439 /*
440 * Special case: if we're at the very end of the
441 * paragraph, we don't score penalty points for the
442 * white space left on the line.
443 */
444 cost = 0;
445 } else {
446 cost = (thiswidth-linelen) * (thiswidth-linelen);
447 }
448 }
449
450 /*
451 * Add in the cost of wrapping all lines after this
452 * point too.
453 */
454 if (i+j < nwords)
455 cost += wrapwords[i+j].cost;
456
457 /*
458 * We compare bestcost >= cost, not bestcost > cost,
459 * because in cases where the costs are identical we
460 * want to try to look like the greedy algorithm,
461 * because readers are likely to have spent a lot of
462 * time looking at greedy-wrapped paragraphs and
463 * there's no point violating the Principle of Least
464 * Surprise if it doesn't actually gain anything.
465 */
466 if (best < 0 || bestcost >= cost) {
467 bestcost = cost;
468 best = j;
469 }
470 }
471 /*
472 * Now we know the optimal answer for this terminal
473 * subsequence, so put it in wrapwords.
474 */
475 wrapwords[i].cost = bestcost;
476 wrapwords[i].nwords = best;
477 }
478
479 /*
480 * We've wrapped the paragraph. Now build the output
481 * `wrappedline' list.
482 */
483 i = 0;
484 while (i < nwords) {
485 wrappedline *w = snew(wrappedline);
486 *ptr = w;
487 ptr = &w->next;
488 w->next = NULL;
489
490 n = wrapwords[i].nwords;
491 w->begin = wrapwords[i].begin;
492 w->end = wrapwords[i+n-1].end;
493
494 /*
495 * Count along the words to find nspaces and shortfall.
496 */
497 w->nspaces = 0;
498 w->shortfall = width;
499 for (j = 0; j < n; j++) {
500 w->shortfall -= wrapwords[i+j].width;
501 if (j < n-1 && wrapwords[i+j].spacewidth) {
502 w->nspaces++;
503 w->shortfall -= wrapwords[i+j].spacewidth;
504 }
505 }
506 i += n;
507 }
508
509 sfree(wrapwords);
510
511 return head;
512 }
513
wrap_free(wrappedline * w)514 void wrap_free(wrappedline *w) {
515 while (w) {
516 wrappedline *t = w->next;
517 sfree(w);
518 w = t;
519 }
520 }
521
cmdline_cfg_add(paragraph * cfg,char * string)522 void cmdline_cfg_add(paragraph *cfg, char *string)
523 {
524 wchar_t *ustring;
525 int upos, ulen, pos, len;
526
527 ulen = 0;
528 while (cfg->keyword[ulen])
529 ulen += 1 + ustrlen(cfg->keyword+ulen);
530 len = 0;
531 while (cfg->origkeyword[len])
532 len += 1 + strlen(cfg->origkeyword+len);
533
534 ustring = ufroma_locale_dup(string);
535
536 upos = ulen;
537 ulen += 2 + ustrlen(ustring);
538 cfg->keyword = sresize(cfg->keyword, ulen, wchar_t);
539 ustrcpy(cfg->keyword+upos, ustring);
540 cfg->keyword[ulen-1] = L'\0';
541
542 pos = len;
543 len += 2 + strlen(string);
544 cfg->origkeyword = sresize(cfg->origkeyword, len, char);
545 strcpy(cfg->origkeyword+pos, string);
546 cfg->origkeyword[len-1] = '\0';
547
548 sfree(ustring);
549 }
550
cmdline_cfg_new(void)551 paragraph *cmdline_cfg_new(void)
552 {
553 paragraph *p;
554
555 p = snew(paragraph);
556 memset(p, 0, sizeof(*p));
557 p->type = para_Config;
558 p->next = NULL;
559 p->fpos.filename = "<command line>";
560 p->fpos.line = p->fpos.col = -1;
561 p->keyword = ustrdup(L"\0");
562 p->origkeyword = dupstr("\0");
563
564 return p;
565 }
566
cmdline_cfg_simple(char * string,...)567 paragraph *cmdline_cfg_simple(char *string, ...)
568 {
569 va_list ap;
570 char *s;
571 paragraph *p;
572
573 p = cmdline_cfg_new();
574 cmdline_cfg_add(p, string);
575
576 va_start(ap, string);
577 while ((s = va_arg(ap, char *)) != NULL)
578 cmdline_cfg_add(p, s);
579 va_end(ap);
580
581 return p;
582 }
583