1 #ifndef HALIBUT_HALIBUT_H
2 #define HALIBUT_HALIBUT_H
3 
4 #include <stdio.h>
5 #include <wchar.h>
6 #include <time.h>
7 #include <string.h>
8 
9 #include "charset.h"
10 
11 #ifdef __GNUC__
12 #define NORETURN __attribute__((__noreturn__))
13 #else
14 #define NORETURN /* nothing */
15 #endif
16 
17 #ifndef TRUE
18 #define TRUE 1
19 #endif
20 #ifndef FALSE
21 #define FALSE 0
22 #endif
23 
24 /* For suppressing unused-parameter warnings */
25 #define IGNORE(x) ( (x) = (x) )
26 
27 #include "tree234.h"
28 
29 /*
30  * Structure tags
31  */
32 typedef struct input_Tag input;
33 typedef struct filepos_Tag filepos;
34 typedef struct paragraph_Tag paragraph;
35 typedef struct word_Tag word;
36 typedef struct keywordlist_Tag keywordlist;
37 typedef struct keyword_Tag keyword;
38 typedef struct numberstate_Tag numberstate;
39 typedef struct indexdata_Tag indexdata;
40 typedef struct indextag_Tag indextag;
41 typedef struct indexentry_Tag indexentry;
42 typedef struct macrostack_Tag macrostack;
43 
44 /*
45  * Data structure to hold a file name and index, a line and a
46  * column number, for reporting errors
47  */
48 struct filepos_Tag {
49     char *filename;
50     int line, col;
51 };
52 
53 /*
54  * Data structure to hold all the file names etc for input
55  */
56 typedef struct pushback_Tag {
57     int chr;
58     filepos pos;
59 } pushback;
60 struct input_Tag {
61     char **filenames;		       /* complete list of input files */
62     int nfiles;			       /* how many in the list */
63     FILE *currfp;		       /* the currently open one */
64     int currindex;		       /* which one is that in the list */
65     int wantclose;		       /* does the current file want closing */
66     pushback *pushback;		       /* pushed-back input characters */
67     int npushback, pushbacksize;
68     filepos pos;
69     int reportcols;		       /* report column numbers in errors */
70     macrostack *stack;		       /* macro expansions in force */
71     int defcharset, charset;	       /* character sets for input files */
72     charset_state csstate;
73     wchar_t wc[16];		       /* wide chars from input conversion */
74     int nwc, wcpos;		       /* size of, and position in, wc[] */
75     char *pushback_chars;	       /* used to save input-encoding data */
76 };
77 
78 /*
79  * Data structure to hold the input form of the source, ie a linked
80  * list of paragraphs
81  */
82 struct paragraph_Tag {
83     paragraph *next;
84     int type;
85     wchar_t *keyword;		       /* for most special paragraphs */
86     char *origkeyword;		       /* same again in original charset */
87     word *words;		       /* list of words in paragraph */
88     int aux;			       /* number, in a numbered paragraph
89                                         * or subsection level
90                                         */
91     word *kwtext;		       /* chapter/section indication */
92     word *kwtext2;		       /* numeric-only form of kwtext */
93     filepos fpos;
94 
95     paragraph *parent, *child, *sibling;   /* for hierarchy navigation */
96 
97     void *private_data; 	       /* for temp use in backends */
98 };
99 enum {
100     para_IM,			       /* index merge */
101     para_BR,			       /* bibliography rewrite */
102     para_Rule,			       /* random horizontal rule */
103     para_Chapter,
104     para_Appendix,
105     para_UnnumberedChapter,
106     para_Heading,
107     para_Subsect,
108     para_Normal,
109     para_Biblio,		       /* causes no output unless turned ... */
110     para_BiblioCited,		       /*  ... into this paragraph type */
111     para_Bullet,
112     para_NumberedList,
113     para_DescribedThing,
114     para_Description,
115     para_Code,
116     para_Copyright,
117     para_NoCite,
118     para_Title,
119     para_VersionID,
120     para_Config,		       /* configuration directive */
121     para_LcontPush,		       /* begin continuation of list item */
122     para_LcontPop,		       /* end continuation of list item */
123     para_QuotePush,		       /* begin block quote */
124     para_QuotePop,		       /* end block quote */
125     /*
126      * Back ends may define their own paragraph types beyond here,
127      * in case they need to use them internally.
128      */
129     para_NotParaType		       /* placeholder value */
130 };
131 
132 /*
133  * Data structure to hold an individual word
134  */
135 struct word_Tag {
136     word *next, *alt;
137     int type;
138     int aux;
139     int breaks;			       /* can a line break after it? */
140     wchar_t *text;
141     filepos fpos;
142 
143     void *private_data; 	       /* for temp use in backends */
144 };
145 enum {
146     /* ORDERING CONSTRAINT: these normal-word types ... */
147     word_Normal,
148     word_Emph,
149     word_Strong,
150     word_Code,			       /* monospaced; `quoted' in text */
151     word_WeakCode,		       /* monospaced, normal in text */
152     /* ... must be in the same order as these space types ... */
153     word_WhiteSpace,		       /* text is NULL or ignorable */
154     word_EmphSpace,		       /* WhiteSpace when emphasised */
155     word_StrongSpace,		       /* WhiteSpace when strong */
156     word_CodeSpace,		       /* WhiteSpace when code */
157     word_WkCodeSpace,		       /* WhiteSpace when weak code */
158     /* ... and must be in the same order as these quote types ... */
159     word_Quote,			       /* text is NULL or ignorable */
160     word_EmphQuote,		       /* Quote when emphasised */
161     word_StrongQuote,		       /* Quote when strong */
162     word_CodeQuote,		       /* (can't happen) */
163     word_WkCodeQuote,		       /* (can't happen) */
164     /* END ORDERING CONSTRAINT */
165     word_internal_endattrs,
166     word_UpperXref,		       /* \K */
167     word_LowerXref,		       /* \k */
168     word_XrefEnd,		       /* (invisible; no text) */
169     word_IndexRef,		       /* (always an invisible one) */
170     word_HyperLink,		       /* (invisible) */
171     word_HyperEnd,		       /* (also invisible; no text) */
172     /*
173      * Back ends may define their own word types beyond here, in
174      * case they need to use them internally.
175      */
176     word_NotWordType		       /* placeholder value */
177 };
178 /* aux values for attributed words */
179 enum {
180     attr_Only   = 0x0000,	       /* a lone word with the attribute */
181     attr_First  = 0x0001,	       /* the first of a series */
182     attr_Last   = 0x0002,	       /* the last of a series */
183     attr_Always	= 0x0003,	       /* any other part of a series */
184     attr_mask   = 0x0003
185 };
186 /* aux values for quote-type words */
187 enum {
188     quote_Open  = 0x0010,
189     quote_Close = 0x0020,
190     quote_mask  = 0x0030
191 };
192 #define isvis(x) ( ( (x) >= word_Normal && (x) <= word_LowerXref ) )
193 #define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \
194                     ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) )
195 #define NATTRS (word_WhiteSpace - word_Normal)
196 #define sameattr(x,y) ( (((x)-(y)) % NATTRS) == 0 )
197 #define towordstyle(x) ( word_Normal + ((x) % NATTRS) )
198 #define tospacestyle(x) ( word_WhiteSpace + ((x) % NATTRS) )
199 #define toquotestyle(x) ( word_Quote + ((x) % NATTRS) )
200 #define removeattr(x) ( word_Normal + ((x)/NATTRS * NATTRS) )
201 
202 #define attraux(x) ( (x) & attr_mask )
203 #define quoteaux(x) ( (x) & quote_mask )
204 
205 /*
206  * error.c
207  */
208 /* out of memory */
209 void fatalerr_nomemory(void) NORETURN;
210 /* option `-%s' requires an argument */
211 void err_optnoarg(const char *sp);
212 /* unrecognised option `-%s' */
213 void err_nosuchopt(const char *sp);
214 /* unrecognised charset %s (cmdline) */
215 void err_cmdcharset(const char *sp);
216 /* futile option `-%s'%s */
217 void err_futileopt(const char *sp, const char *sp2);
218 /* no input files */
219 void err_noinput(void);
220 /* unable to open input file `%s' */
221 void err_cantopen(const char *sp);
222 /* no data in input files */
223 void err_nodata(void);
224 /* line in codepara didn't begin `\c' */
225 void err_brokencodepara(const filepos *fpos);
226 /* expected `}' after keyword */
227 void err_kwunclosed(const filepos *fpos);
228 /* paragraph type expects no keyword */
229 void err_kwexpected(const filepos *fpos);
230 /* paragraph type expects a keyword */
231 void err_kwillegal(const filepos *fpos);
232 /* paragraph type expects only 1 */
233 void err_kwtoomany(const filepos *fpos);
234 /* paragraph type expects only kws! */
235 void err_bodyillegal(const filepos *fpos);
236 /* invalid command at start of para */
237 void err_badparatype(const wchar_t *wsp, const filepos *fpos);
238 /* invalid command in mid-para */
239 void err_badmidcmd(const wchar_t *wsp, const filepos *fpos);
240 /* unexpected brace */
241 void err_unexbrace(const filepos *fpos);
242 /* expected `{' after command */
243 void err_explbr(const filepos *fpos);
244 /* EOF inside braced comment */
245 void err_commenteof(const filepos *fpos);
246 /* expected `}' after cross-ref */
247 void err_kwexprbr(const filepos *fpos);
248 /* \q within \c is not supported */
249 void err_codequote(const filepos *fpos);
250 /* unclosed braces at end of para */
251 void err_missingrbrace(const filepos *fpos);
252 /* unclosed braces at end of file */
253 void err_missingrbrace2(const filepos *fpos);
254 /* unable to nest text styles */
255 void err_nestedstyles(const filepos *fpos);
256 /* unable to nest `\i' thingys */
257 void err_nestedindex(const filepos *fpos);
258 /* two \i differing only in case */
259 void err_indexcase(const filepos *fpos, const wchar_t *wsp,
260                    const filepos *fpos2, const wchar_t *wsp2);
261 /* unresolved cross-reference */
262 void err_nosuchkw(const filepos *fpos, const wchar_t *wsp);
263 /* multiple \BRs on same keyword */
264 void err_multiBR(const filepos *fpos, const wchar_t *wsp);
265 /* \IM on unknown index tag (warning) */
266 void err_nosuchidxtag(const filepos *fpos, const wchar_t *wsp);
267 /* can't open output file for write */
268 void err_cantopenw(const char *sp);
269 /* this macro already exists */
270 void err_macroexists(const filepos *fpos, const wchar_t *wsp);
271 /* jump a heading level, eg \C -> \S */
272 void err_sectjump(const filepos *fpos);
273 /* WinHelp context ID hash clash */
274 void err_winhelp_ctxclash(const filepos *fpos, const char *sp, const char *sp2);
275 /* keyword clash in sections */
276 void err_multikw(const filepos *fpos, const filepos *fpos2, const wchar_t *wsp);
277 /* \lcont not after a list item */
278 void err_misplacedlcont(const filepos *fpos);
279 /* section marker appeared in block */
280 void err_sectmarkerinblock(const filepos *fpos, const char *sp);
281 /* \cfg{%s} insufficient args (<%d) */
282 void err_cfginsufarg(const filepos *fpos, const char *sp, int i);
283 /* colon/comma in node name in info */
284 void err_infonodechar(const filepos *fpos, char c) /* fpos might be NULL */;
285 /* \c line too long in text backend */
286 void err_text_codeline(const filepos *fpos, int i, int j);
287 /* unrecognised HTML version keyword */
288 void err_htmlver(const filepos *fpos, const wchar_t *wsp);
289 /* unrecognised character set name */
290 void err_charset(const filepos *fpos, const wchar_t *wsp);
291 /* unrecognised font name */
292 void err_nofont(const filepos *fpos, const wchar_t *wsp);
293 /* eof in AFM file */
294 void err_afmeof(const filepos *fpos);
295 /* missing expected keyword in AFM */
296 void err_afmkey(const filepos *fpos, const char *sp);
297 /* unsupported AFM version */
298 void err_afmvers(const filepos *fpos);
299 /* missing value(s) for AFM key */
300 void err_afmval(const filepos *fpos, const char *sp, int i);
301 /* eof in Type 1 font file */
302 void err_pfeof(const filepos *fpos);
303 /* bad Type 1 header line */
304 void err_pfhead(const filepos *fpos);
305 /* otherwise invalide Type 1 font */
306 void err_pfbad(const filepos *fpos);
307 /* Type 1 font but no AFM */
308 void err_pfnoafm(const filepos *fpos, const char *sp);
309 /* need both or neither of hhp+chm */
310 void err_chmnames(void);
311 /* required sfnt table missing */
312 void err_sfntnotable(const filepos *fpos, const char *sp);
313 /* sfnt has no PostScript name */
314 void err_sfntnopsname(const filepos *fpos);
315 /* sfnt table not valid */
316 void err_sfntbadtable(const filepos *fpos, const char *sp);
317 /* sfnt has no UCS-2 cmap */
318 void err_sfntnounicmap(const filepos *fpos);
319 /* sfnt table version unknown */
320 void err_sfnttablevers(const filepos *fpos, const char *sp);
321 /* sfnt has bad header */
322 void err_sfntbadhdr(const filepos *fpos);
323 /* sfnt cmap references bad glyph */
324 void err_sfntbadglyph(const filepos *fpos, unsigned wc);
325 /* CHM internal file names can't start with # or $ */
326 void err_chm_badname(const filepos *fpos, const char *sp);
327 
328 /*
329  * malloc.c
330  */
331 #ifdef LOGALLOC
332 void *smalloc(char *file, int line, int size);
333 void *srealloc(char *file, int line, void *p, int size);
334 void sfree(char *file, int line, void *p);
335 #define smalloc(x) smalloc(__FILE__, __LINE__, x)
336 #define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y)
337 #define sfree(x) sfree(__FILE__, __LINE__, x)
338 #else
339 void *smalloc(int size);
340 void *srealloc(void *p, int size);
341 void sfree(void *p);
342 #endif
343 void free_word_list(word *w);
344 void free_para_list(paragraph *p);
345 word *dup_word_list(word *w);
346 char *dupstr(char const *s);
347 
348 #define snew(type) ( (type *) smalloc (sizeof (type)) )
349 #define snewn(number, type) ( (type *) smalloc ((number) * sizeof (type)) )
350 #define sresize(array, number, type) \
351 	( (type *) srealloc ((array), (number) * sizeof (type)) )
352 #define lenof(array) ( sizeof(array) / sizeof(*(array)) )
353 
354 /*
355  * ustring.c
356  */
357 wchar_t *ustrdup(wchar_t const *s);
358 char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset);
359 char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset);
360 wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset);
361 char *utoa_dup(wchar_t const *s, int charset);
362 char *utoa_dup_len(wchar_t const *s, int charset, int *len);
363 char *utoa_careful_dup(wchar_t const *s, int charset);
364 wchar_t *ufroma_dup(char const *s, int charset);
365 char *utoa_locale_dup(wchar_t const *s);
366 wchar_t *ufroma_locale_dup(char const *s);
367 int ustrlen(wchar_t const *s);
368 wchar_t *uadv(wchar_t *s);
369 wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source);
370 wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n);
371 wchar_t utolower(wchar_t);
372 int uisalpha(wchar_t);
373 int ustrcmp(wchar_t *lhs, wchar_t *rhs);
374 int ustricmp(wchar_t const *lhs, wchar_t const *rhs);
375 int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen);
376 int utoi(wchar_t const *);
377 double utof(wchar_t const *);
378 int utob(wchar_t const *);
379 int uisdigit(wchar_t);
380 wchar_t *ustrlow(wchar_t *s);
381 wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec);
382 int cvt_ok(int charset, const wchar_t *s);
383 int charset_from_ustr(filepos *fpos, const wchar_t *name);
384 
385 /*
386  * wcwidth.c
387  */
388 int strwid(char const *s, int charset);
389 int ustrwid(wchar_t const *s, int charset);
390 
391 /*
392  * help.c
393  */
394 void help(void);
395 void usage(void);
396 void showversion(void);
397 void listcharsets(void);
398 
399 /*
400  * licence.c
401  */
402 void licence(void);
403 
404 /*
405  * version.c
406  */
407 extern const char *const version;
408 
409 /*
410  * misc.c
411  */
412 char *adv(char *s);
413 
414 typedef struct stackTag *stack;
415 stack stk_new(void);
416 void stk_free(stack);
417 void stk_push(stack, void *);
418 void *stk_pop(stack);
419 void *stk_top(stack);
420 
421 typedef struct tagRdstring rdstring;
422 struct tagRdstring {
423     int pos, size;
424     wchar_t *text;
425 };
426 typedef struct tagRdstringc rdstringc;
427 struct tagRdstringc {
428     int pos, size;
429     char *text;
430 };
431 extern const rdstring empty_rdstring;
432 extern const rdstringc empty_rdstringc;
433 void rdadd(rdstring *rs, wchar_t c);
434 void rdadds(rdstring *rs, wchar_t const *p);
435 wchar_t *rdtrim(rdstring *rs);
436 void rdaddc(rdstringc *rs, char c);
437 void rdaddsc(rdstringc *rs, char const *p);
438 void rdaddsn(rdstringc *rc, char const *p, int len);
439 char *rdtrimc(rdstringc *rs);
440 
441 int compare_wordlists(word *a, word *b);
442 
443 void mark_attr_ends(word *words);
444 
445 typedef struct tagWrappedLine wrappedline;
446 struct tagWrappedLine {
447     wrappedline *next;
448     word *begin, *end;		       /* first & last words of line */
449     int nspaces;		       /* number of whitespaces in line */
450     int shortfall;		       /* how much shorter than max width */
451 };
452 wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int);
453 void wrap_free(wrappedline *);
454 void cmdline_cfg_add(paragraph *cfg, char *string);
455 paragraph *cmdline_cfg_new(void);
456 paragraph *cmdline_cfg_simple(char *string, ...);
457 
458 /*
459  * input.c
460  */
461 paragraph *read_input(input *in, indexdata *idx);
462 
463 /*
464  * in_afm.c
465  */
466 void read_afm_file(input *in);
467 
468 /*
469  * in_pf.c
470  */
471 void read_pfa_file(input *in);
472 void read_pfb_file(input *in);
473 
474 /*
475  * in_sfnt.c
476  */
477 void read_sfnt_file(input *in);
478 
479 /*
480  * keywords.c
481  */
482 struct keywordlist_Tag {
483     int nkeywords;
484     int size;
485     tree234 *keys;		       /* sorted by `key' field */
486     word **looseends;		       /* non-keyword list element numbers */
487     int nlooseends;
488     int looseendssize;
489 };
490 struct keyword_Tag {
491     wchar_t *key;		       /* the keyword itself */
492     word *text;			       /* "Chapter 2", "Appendix Q"... */
493     				       /* (NB: filepos are not set) */
494     paragraph *para;		       /* the paragraph referenced */
495 };
496 keyword *kw_lookup(keywordlist *, wchar_t *);
497 keywordlist *get_keywords(paragraph *);
498 void free_keywords(keywordlist *);
499 void subst_keywords(paragraph *, keywordlist *);
500 
501 /*
502  * index.c
503  */
504 
505 /*
506  * Data structure to hold both sides of the index.
507  */
508 struct indexdata_Tag {
509     tree234 *tags;		       /* holds type `indextag' */
510     tree234 *entries;		       /* holds type `indexentry' */
511 };
512 
513 /*
514  * Data structure to hold an index tag (LHS of index).
515  */
516 struct indextag_Tag {
517     wchar_t *name;
518     word *implicit_text;
519     filepos implicit_fpos;
520     word **explicit_texts;
521     filepos *explicit_fpos;
522     int nexplicit, explicit_size;
523     int nrefs;
524     indexentry **refs;		       /* array of entries referenced by tag */
525 };
526 
527 /*
528  * Data structure to hold an index entry (RHS of index).
529  */
530 struct indexentry_Tag {
531     word *text;
532     void *backend_data;		       /* private to back end */
533     filepos fpos;
534 };
535 
536 indexdata *make_index(void);
537 void cleanup_index(indexdata *);
538 /* index_merge takes responsibility for freeing arg 3 iff implicit; never
539  * takes responsibility for arg 2 */
540 void index_merge(indexdata *, int is_explicit, wchar_t *, word *, filepos *);
541 void build_index(indexdata *);
542 void index_debug(indexdata *);
543 indextag *index_findtag(indexdata *idx, wchar_t *name);
544 
545 /*
546  * contents.c
547  */
548 numberstate *number_init(void);
549 void number_cfg(numberstate *, paragraph *);
550 word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, int *);
551 void number_free(numberstate *);
552 
553 /*
554  * biblio.c
555  */
556 void gen_citations(paragraph *, keywordlist *);
557 
558 /*
559  * bk_text.c
560  */
561 void text_backend(paragraph *, keywordlist *, indexdata *, void *);
562 paragraph *text_config_filename(char *filename);
563 
564 /*
565  * bk_html.c
566  */
567 void html_backend(paragraph *, keywordlist *, indexdata *, void *);
568 void chm_backend(paragraph *, keywordlist *, indexdata *, void *);
569 paragraph *html_config_filename(char *filename);
570 paragraph *chm_config_filename(char *filename);
571 
572 /*
573  * bk_whlp.c
574  */
575 void whlp_backend(paragraph *, keywordlist *, indexdata *, void *);
576 paragraph *whlp_config_filename(char *filename);
577 
578 /*
579  * bk_man.c
580  */
581 void man_backend(paragraph *, keywordlist *, indexdata *, void *);
582 paragraph *man_config_filename(char *filename);
583 
584 /*
585  * bk_info.c
586  */
587 void info_backend(paragraph *, keywordlist *, indexdata *, void *);
588 paragraph *info_config_filename(char *filename);
589 
590 /*
591  * bk_paper.c
592  */
593 void *paper_pre_backend(paragraph *, keywordlist *, indexdata *);
594 void listfonts(void);
595 
596 /*
597  * bk_ps.c
598  */
599 void ps_backend(paragraph *, keywordlist *, indexdata *, void *);
600 paragraph *ps_config_filename(char *filename);
601 
602 /*
603  * bk_pdf.c
604  */
605 void pdf_backend(paragraph *, keywordlist *, indexdata *, void *);
606 paragraph *pdf_config_filename(char *filename);
607 
608 #endif
609