1 #ifndef HALIBUT_HALIBUT_H 2 #define HALIBUT_HALIBUT_H 3 4 #include <stdio.h> 5 #include <wchar.h> 6 #include <time.h> 7 #include <string.h> 8 9 #include "charset.h" 10 11 #ifdef __GNUC__ 12 #define NORETURN __attribute__((__noreturn__)) 13 #else 14 #define NORETURN /* nothing */ 15 #endif 16 17 #ifndef TRUE 18 #define TRUE 1 19 #endif 20 #ifndef FALSE 21 #define FALSE 0 22 #endif 23 24 /* For suppressing unused-parameter warnings */ 25 #define IGNORE(x) ( (x) = (x) ) 26 27 #include "tree234.h" 28 29 /* 30 * Structure tags 31 */ 32 typedef struct input_Tag input; 33 typedef struct filepos_Tag filepos; 34 typedef struct paragraph_Tag paragraph; 35 typedef struct word_Tag word; 36 typedef struct keywordlist_Tag keywordlist; 37 typedef struct keyword_Tag keyword; 38 typedef struct numberstate_Tag numberstate; 39 typedef struct indexdata_Tag indexdata; 40 typedef struct indextag_Tag indextag; 41 typedef struct indexentry_Tag indexentry; 42 typedef struct macrostack_Tag macrostack; 43 44 /* 45 * Data structure to hold a file name and index, a line and a 46 * column number, for reporting errors 47 */ 48 struct filepos_Tag { 49 char *filename; 50 int line, col; 51 }; 52 53 /* 54 * Data structure to hold all the file names etc for input 55 */ 56 typedef struct pushback_Tag { 57 int chr; 58 filepos pos; 59 } pushback; 60 struct input_Tag { 61 char **filenames; /* complete list of input files */ 62 int nfiles; /* how many in the list */ 63 FILE *currfp; /* the currently open one */ 64 int currindex; /* which one is that in the list */ 65 int wantclose; /* does the current file want closing */ 66 pushback *pushback; /* pushed-back input characters */ 67 int npushback, pushbacksize; 68 filepos pos; 69 int reportcols; /* report column numbers in errors */ 70 macrostack *stack; /* macro expansions in force */ 71 int defcharset, charset; /* character sets for input files */ 72 charset_state csstate; 73 wchar_t wc[16]; /* wide chars from input conversion */ 74 int nwc, wcpos; /* size of, and position in, wc[] */ 75 char *pushback_chars; /* used to save input-encoding data */ 76 }; 77 78 /* 79 * Data structure to hold the input form of the source, ie a linked 80 * list of paragraphs 81 */ 82 struct paragraph_Tag { 83 paragraph *next; 84 int type; 85 wchar_t *keyword; /* for most special paragraphs */ 86 char *origkeyword; /* same again in original charset */ 87 word *words; /* list of words in paragraph */ 88 int aux; /* number, in a numbered paragraph 89 * or subsection level 90 */ 91 word *kwtext; /* chapter/section indication */ 92 word *kwtext2; /* numeric-only form of kwtext */ 93 filepos fpos; 94 95 paragraph *parent, *child, *sibling; /* for hierarchy navigation */ 96 97 void *private_data; /* for temp use in backends */ 98 }; 99 enum { 100 para_IM, /* index merge */ 101 para_BR, /* bibliography rewrite */ 102 para_Rule, /* random horizontal rule */ 103 para_Chapter, 104 para_Appendix, 105 para_UnnumberedChapter, 106 para_Heading, 107 para_Subsect, 108 para_Normal, 109 para_Biblio, /* causes no output unless turned ... */ 110 para_BiblioCited, /* ... into this paragraph type */ 111 para_Bullet, 112 para_NumberedList, 113 para_DescribedThing, 114 para_Description, 115 para_Code, 116 para_Copyright, 117 para_NoCite, 118 para_Title, 119 para_VersionID, 120 para_Config, /* configuration directive */ 121 para_LcontPush, /* begin continuation of list item */ 122 para_LcontPop, /* end continuation of list item */ 123 para_QuotePush, /* begin block quote */ 124 para_QuotePop, /* end block quote */ 125 /* 126 * Back ends may define their own paragraph types beyond here, 127 * in case they need to use them internally. 128 */ 129 para_NotParaType /* placeholder value */ 130 }; 131 132 /* 133 * Data structure to hold an individual word 134 */ 135 struct word_Tag { 136 word *next, *alt; 137 int type; 138 int aux; 139 int breaks; /* can a line break after it? */ 140 wchar_t *text; 141 filepos fpos; 142 143 void *private_data; /* for temp use in backends */ 144 }; 145 enum { 146 /* ORDERING CONSTRAINT: these normal-word types ... */ 147 word_Normal, 148 word_Emph, 149 word_Strong, 150 word_Code, /* monospaced; `quoted' in text */ 151 word_WeakCode, /* monospaced, normal in text */ 152 /* ... must be in the same order as these space types ... */ 153 word_WhiteSpace, /* text is NULL or ignorable */ 154 word_EmphSpace, /* WhiteSpace when emphasised */ 155 word_StrongSpace, /* WhiteSpace when strong */ 156 word_CodeSpace, /* WhiteSpace when code */ 157 word_WkCodeSpace, /* WhiteSpace when weak code */ 158 /* ... and must be in the same order as these quote types ... */ 159 word_Quote, /* text is NULL or ignorable */ 160 word_EmphQuote, /* Quote when emphasised */ 161 word_StrongQuote, /* Quote when strong */ 162 word_CodeQuote, /* (can't happen) */ 163 word_WkCodeQuote, /* (can't happen) */ 164 /* END ORDERING CONSTRAINT */ 165 word_internal_endattrs, 166 word_UpperXref, /* \K */ 167 word_LowerXref, /* \k */ 168 word_XrefEnd, /* (invisible; no text) */ 169 word_IndexRef, /* (always an invisible one) */ 170 word_HyperLink, /* (invisible) */ 171 word_HyperEnd, /* (also invisible; no text) */ 172 /* 173 * Back ends may define their own word types beyond here, in 174 * case they need to use them internally. 175 */ 176 word_NotWordType /* placeholder value */ 177 }; 178 /* aux values for attributed words */ 179 enum { 180 attr_Only = 0x0000, /* a lone word with the attribute */ 181 attr_First = 0x0001, /* the first of a series */ 182 attr_Last = 0x0002, /* the last of a series */ 183 attr_Always = 0x0003, /* any other part of a series */ 184 attr_mask = 0x0003 185 }; 186 /* aux values for quote-type words */ 187 enum { 188 quote_Open = 0x0010, 189 quote_Close = 0x0020, 190 quote_mask = 0x0030 191 }; 192 #define isvis(x) ( ( (x) >= word_Normal && (x) <= word_LowerXref ) ) 193 #define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \ 194 ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) ) 195 #define NATTRS (word_WhiteSpace - word_Normal) 196 #define sameattr(x,y) ( (((x)-(y)) % NATTRS) == 0 ) 197 #define towordstyle(x) ( word_Normal + ((x) % NATTRS) ) 198 #define tospacestyle(x) ( word_WhiteSpace + ((x) % NATTRS) ) 199 #define toquotestyle(x) ( word_Quote + ((x) % NATTRS) ) 200 #define removeattr(x) ( word_Normal + ((x)/NATTRS * NATTRS) ) 201 202 #define attraux(x) ( (x) & attr_mask ) 203 #define quoteaux(x) ( (x) & quote_mask ) 204 205 /* 206 * error.c 207 */ 208 /* out of memory */ 209 void fatalerr_nomemory(void) NORETURN; 210 /* option `-%s' requires an argument */ 211 void err_optnoarg(const char *sp); 212 /* unrecognised option `-%s' */ 213 void err_nosuchopt(const char *sp); 214 /* unrecognised charset %s (cmdline) */ 215 void err_cmdcharset(const char *sp); 216 /* futile option `-%s'%s */ 217 void err_futileopt(const char *sp, const char *sp2); 218 /* no input files */ 219 void err_noinput(void); 220 /* unable to open input file `%s' */ 221 void err_cantopen(const char *sp); 222 /* no data in input files */ 223 void err_nodata(void); 224 /* line in codepara didn't begin `\c' */ 225 void err_brokencodepara(const filepos *fpos); 226 /* expected `}' after keyword */ 227 void err_kwunclosed(const filepos *fpos); 228 /* paragraph type expects no keyword */ 229 void err_kwexpected(const filepos *fpos); 230 /* paragraph type expects a keyword */ 231 void err_kwillegal(const filepos *fpos); 232 /* paragraph type expects only 1 */ 233 void err_kwtoomany(const filepos *fpos); 234 /* paragraph type expects only kws! */ 235 void err_bodyillegal(const filepos *fpos); 236 /* invalid command at start of para */ 237 void err_badparatype(const wchar_t *wsp, const filepos *fpos); 238 /* invalid command in mid-para */ 239 void err_badmidcmd(const wchar_t *wsp, const filepos *fpos); 240 /* unexpected brace */ 241 void err_unexbrace(const filepos *fpos); 242 /* expected `{' after command */ 243 void err_explbr(const filepos *fpos); 244 /* EOF inside braced comment */ 245 void err_commenteof(const filepos *fpos); 246 /* expected `}' after cross-ref */ 247 void err_kwexprbr(const filepos *fpos); 248 /* \q within \c is not supported */ 249 void err_codequote(const filepos *fpos); 250 /* unclosed braces at end of para */ 251 void err_missingrbrace(const filepos *fpos); 252 /* unclosed braces at end of file */ 253 void err_missingrbrace2(const filepos *fpos); 254 /* unable to nest text styles */ 255 void err_nestedstyles(const filepos *fpos); 256 /* unable to nest `\i' thingys */ 257 void err_nestedindex(const filepos *fpos); 258 /* two \i differing only in case */ 259 void err_indexcase(const filepos *fpos, const wchar_t *wsp, 260 const filepos *fpos2, const wchar_t *wsp2); 261 /* unresolved cross-reference */ 262 void err_nosuchkw(const filepos *fpos, const wchar_t *wsp); 263 /* multiple \BRs on same keyword */ 264 void err_multiBR(const filepos *fpos, const wchar_t *wsp); 265 /* \IM on unknown index tag (warning) */ 266 void err_nosuchidxtag(const filepos *fpos, const wchar_t *wsp); 267 /* can't open output file for write */ 268 void err_cantopenw(const char *sp); 269 /* this macro already exists */ 270 void err_macroexists(const filepos *fpos, const wchar_t *wsp); 271 /* jump a heading level, eg \C -> \S */ 272 void err_sectjump(const filepos *fpos); 273 /* WinHelp context ID hash clash */ 274 void err_winhelp_ctxclash(const filepos *fpos, const char *sp, const char *sp2); 275 /* keyword clash in sections */ 276 void err_multikw(const filepos *fpos, const filepos *fpos2, const wchar_t *wsp); 277 /* \lcont not after a list item */ 278 void err_misplacedlcont(const filepos *fpos); 279 /* section marker appeared in block */ 280 void err_sectmarkerinblock(const filepos *fpos, const char *sp); 281 /* \cfg{%s} insufficient args (<%d) */ 282 void err_cfginsufarg(const filepos *fpos, const char *sp, int i); 283 /* colon/comma in node name in info */ 284 void err_infonodechar(const filepos *fpos, char c) /* fpos might be NULL */; 285 /* \c line too long in text backend */ 286 void err_text_codeline(const filepos *fpos, int i, int j); 287 /* unrecognised HTML version keyword */ 288 void err_htmlver(const filepos *fpos, const wchar_t *wsp); 289 /* unrecognised character set name */ 290 void err_charset(const filepos *fpos, const wchar_t *wsp); 291 /* unrecognised font name */ 292 void err_nofont(const filepos *fpos, const wchar_t *wsp); 293 /* eof in AFM file */ 294 void err_afmeof(const filepos *fpos); 295 /* missing expected keyword in AFM */ 296 void err_afmkey(const filepos *fpos, const char *sp); 297 /* unsupported AFM version */ 298 void err_afmvers(const filepos *fpos); 299 /* missing value(s) for AFM key */ 300 void err_afmval(const filepos *fpos, const char *sp, int i); 301 /* eof in Type 1 font file */ 302 void err_pfeof(const filepos *fpos); 303 /* bad Type 1 header line */ 304 void err_pfhead(const filepos *fpos); 305 /* otherwise invalide Type 1 font */ 306 void err_pfbad(const filepos *fpos); 307 /* Type 1 font but no AFM */ 308 void err_pfnoafm(const filepos *fpos, const char *sp); 309 /* need both or neither of hhp+chm */ 310 void err_chmnames(void); 311 /* required sfnt table missing */ 312 void err_sfntnotable(const filepos *fpos, const char *sp); 313 /* sfnt has no PostScript name */ 314 void err_sfntnopsname(const filepos *fpos); 315 /* sfnt table not valid */ 316 void err_sfntbadtable(const filepos *fpos, const char *sp); 317 /* sfnt has no UCS-2 cmap */ 318 void err_sfntnounicmap(const filepos *fpos); 319 /* sfnt table version unknown */ 320 void err_sfnttablevers(const filepos *fpos, const char *sp); 321 /* sfnt has bad header */ 322 void err_sfntbadhdr(const filepos *fpos); 323 /* sfnt cmap references bad glyph */ 324 void err_sfntbadglyph(const filepos *fpos, unsigned wc); 325 /* CHM internal file names can't start with # or $ */ 326 void err_chm_badname(const filepos *fpos, const char *sp); 327 328 /* 329 * malloc.c 330 */ 331 #ifdef LOGALLOC 332 void *smalloc(char *file, int line, int size); 333 void *srealloc(char *file, int line, void *p, int size); 334 void sfree(char *file, int line, void *p); 335 #define smalloc(x) smalloc(__FILE__, __LINE__, x) 336 #define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y) 337 #define sfree(x) sfree(__FILE__, __LINE__, x) 338 #else 339 void *smalloc(int size); 340 void *srealloc(void *p, int size); 341 void sfree(void *p); 342 #endif 343 void free_word_list(word *w); 344 void free_para_list(paragraph *p); 345 word *dup_word_list(word *w); 346 char *dupstr(char const *s); 347 348 #define snew(type) ( (type *) smalloc (sizeof (type)) ) 349 #define snewn(number, type) ( (type *) smalloc ((number) * sizeof (type)) ) 350 #define sresize(array, number, type) \ 351 ( (type *) srealloc ((array), (number) * sizeof (type)) ) 352 #define lenof(array) ( sizeof(array) / sizeof(*(array)) ) 353 354 /* 355 * ustring.c 356 */ 357 wchar_t *ustrdup(wchar_t const *s); 358 char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset); 359 char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset); 360 wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset); 361 char *utoa_dup(wchar_t const *s, int charset); 362 char *utoa_dup_len(wchar_t const *s, int charset, int *len); 363 char *utoa_careful_dup(wchar_t const *s, int charset); 364 wchar_t *ufroma_dup(char const *s, int charset); 365 char *utoa_locale_dup(wchar_t const *s); 366 wchar_t *ufroma_locale_dup(char const *s); 367 int ustrlen(wchar_t const *s); 368 wchar_t *uadv(wchar_t *s); 369 wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source); 370 wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n); 371 wchar_t utolower(wchar_t); 372 int uisalpha(wchar_t); 373 int ustrcmp(wchar_t *lhs, wchar_t *rhs); 374 int ustricmp(wchar_t const *lhs, wchar_t const *rhs); 375 int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen); 376 int utoi(wchar_t const *); 377 double utof(wchar_t const *); 378 int utob(wchar_t const *); 379 int uisdigit(wchar_t); 380 wchar_t *ustrlow(wchar_t *s); 381 wchar_t *ustrftime(const wchar_t *wfmt, const struct tm *timespec); 382 int cvt_ok(int charset, const wchar_t *s); 383 int charset_from_ustr(filepos *fpos, const wchar_t *name); 384 385 /* 386 * wcwidth.c 387 */ 388 int strwid(char const *s, int charset); 389 int ustrwid(wchar_t const *s, int charset); 390 391 /* 392 * help.c 393 */ 394 void help(void); 395 void usage(void); 396 void showversion(void); 397 void listcharsets(void); 398 399 /* 400 * licence.c 401 */ 402 void licence(void); 403 404 /* 405 * version.c 406 */ 407 extern const char *const version; 408 409 /* 410 * misc.c 411 */ 412 char *adv(char *s); 413 414 typedef struct stackTag *stack; 415 stack stk_new(void); 416 void stk_free(stack); 417 void stk_push(stack, void *); 418 void *stk_pop(stack); 419 void *stk_top(stack); 420 421 typedef struct tagRdstring rdstring; 422 struct tagRdstring { 423 int pos, size; 424 wchar_t *text; 425 }; 426 typedef struct tagRdstringc rdstringc; 427 struct tagRdstringc { 428 int pos, size; 429 char *text; 430 }; 431 extern const rdstring empty_rdstring; 432 extern const rdstringc empty_rdstringc; 433 void rdadd(rdstring *rs, wchar_t c); 434 void rdadds(rdstring *rs, wchar_t const *p); 435 wchar_t *rdtrim(rdstring *rs); 436 void rdaddc(rdstringc *rs, char c); 437 void rdaddsc(rdstringc *rs, char const *p); 438 void rdaddsn(rdstringc *rc, char const *p, int len); 439 char *rdtrimc(rdstringc *rs); 440 441 int compare_wordlists(word *a, word *b); 442 443 void mark_attr_ends(word *words); 444 445 typedef struct tagWrappedLine wrappedline; 446 struct tagWrappedLine { 447 wrappedline *next; 448 word *begin, *end; /* first & last words of line */ 449 int nspaces; /* number of whitespaces in line */ 450 int shortfall; /* how much shorter than max width */ 451 }; 452 wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int); 453 void wrap_free(wrappedline *); 454 void cmdline_cfg_add(paragraph *cfg, char *string); 455 paragraph *cmdline_cfg_new(void); 456 paragraph *cmdline_cfg_simple(char *string, ...); 457 458 /* 459 * input.c 460 */ 461 paragraph *read_input(input *in, indexdata *idx); 462 463 /* 464 * in_afm.c 465 */ 466 void read_afm_file(input *in); 467 468 /* 469 * in_pf.c 470 */ 471 void read_pfa_file(input *in); 472 void read_pfb_file(input *in); 473 474 /* 475 * in_sfnt.c 476 */ 477 void read_sfnt_file(input *in); 478 479 /* 480 * keywords.c 481 */ 482 struct keywordlist_Tag { 483 int nkeywords; 484 int size; 485 tree234 *keys; /* sorted by `key' field */ 486 word **looseends; /* non-keyword list element numbers */ 487 int nlooseends; 488 int looseendssize; 489 }; 490 struct keyword_Tag { 491 wchar_t *key; /* the keyword itself */ 492 word *text; /* "Chapter 2", "Appendix Q"... */ 493 /* (NB: filepos are not set) */ 494 paragraph *para; /* the paragraph referenced */ 495 }; 496 keyword *kw_lookup(keywordlist *, wchar_t *); 497 keywordlist *get_keywords(paragraph *); 498 void free_keywords(keywordlist *); 499 void subst_keywords(paragraph *, keywordlist *); 500 501 /* 502 * index.c 503 */ 504 505 /* 506 * Data structure to hold both sides of the index. 507 */ 508 struct indexdata_Tag { 509 tree234 *tags; /* holds type `indextag' */ 510 tree234 *entries; /* holds type `indexentry' */ 511 }; 512 513 /* 514 * Data structure to hold an index tag (LHS of index). 515 */ 516 struct indextag_Tag { 517 wchar_t *name; 518 word *implicit_text; 519 filepos implicit_fpos; 520 word **explicit_texts; 521 filepos *explicit_fpos; 522 int nexplicit, explicit_size; 523 int nrefs; 524 indexentry **refs; /* array of entries referenced by tag */ 525 }; 526 527 /* 528 * Data structure to hold an index entry (RHS of index). 529 */ 530 struct indexentry_Tag { 531 word *text; 532 void *backend_data; /* private to back end */ 533 filepos fpos; 534 }; 535 536 indexdata *make_index(void); 537 void cleanup_index(indexdata *); 538 /* index_merge takes responsibility for freeing arg 3 iff implicit; never 539 * takes responsibility for arg 2 */ 540 void index_merge(indexdata *, int is_explicit, wchar_t *, word *, filepos *); 541 void build_index(indexdata *); 542 void index_debug(indexdata *); 543 indextag *index_findtag(indexdata *idx, wchar_t *name); 544 545 /* 546 * contents.c 547 */ 548 numberstate *number_init(void); 549 void number_cfg(numberstate *, paragraph *); 550 word *number_mktext(numberstate *, paragraph *, wchar_t *, int *, int *); 551 void number_free(numberstate *); 552 553 /* 554 * biblio.c 555 */ 556 void gen_citations(paragraph *, keywordlist *); 557 558 /* 559 * bk_text.c 560 */ 561 void text_backend(paragraph *, keywordlist *, indexdata *, void *); 562 paragraph *text_config_filename(char *filename); 563 564 /* 565 * bk_html.c 566 */ 567 void html_backend(paragraph *, keywordlist *, indexdata *, void *); 568 void chm_backend(paragraph *, keywordlist *, indexdata *, void *); 569 paragraph *html_config_filename(char *filename); 570 paragraph *chm_config_filename(char *filename); 571 572 /* 573 * bk_whlp.c 574 */ 575 void whlp_backend(paragraph *, keywordlist *, indexdata *, void *); 576 paragraph *whlp_config_filename(char *filename); 577 578 /* 579 * bk_man.c 580 */ 581 void man_backend(paragraph *, keywordlist *, indexdata *, void *); 582 paragraph *man_config_filename(char *filename); 583 584 /* 585 * bk_info.c 586 */ 587 void info_backend(paragraph *, keywordlist *, indexdata *, void *); 588 paragraph *info_config_filename(char *filename); 589 590 /* 591 * bk_paper.c 592 */ 593 void *paper_pre_backend(paragraph *, keywordlist *, indexdata *); 594 void listfonts(void); 595 596 /* 597 * bk_ps.c 598 */ 599 void ps_backend(paragraph *, keywordlist *, indexdata *, void *); 600 paragraph *ps_config_filename(char *filename); 601 602 /* 603 * bk_pdf.c 604 */ 605 void pdf_backend(paragraph *, keywordlist *, indexdata *, void *); 606 paragraph *pdf_config_filename(char *filename); 607 608 #endif 609