1 /* $Id: mandoc.h,v 1.152 2014/08/06 15:09:05 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifndef MANDOC_H 19 #define MANDOC_H 20 21 #define ASCII_NBRSP 31 /* non-breaking space */ 22 #define ASCII_HYPH 30 /* breakable hyphen */ 23 #define ASCII_BREAK 29 /* breakable zero-width space */ 24 25 /* 26 * Status level. This refers to both internal status (i.e., whilst 27 * running, when warnings/errors are reported) and an indicator of a 28 * threshold of when to halt (when said internal state exceeds the 29 * threshold). 30 */ 31 enum mandoclevel { 32 MANDOCLEVEL_OK = 0, 33 MANDOCLEVEL_RESERVED, 34 MANDOCLEVEL_WARNING, /* warnings: syntax, whitespace, etc. */ 35 MANDOCLEVEL_ERROR, /* input has been thrown away */ 36 MANDOCLEVEL_FATAL, /* input is borked */ 37 MANDOCLEVEL_BADARG, /* bad argument in invocation */ 38 MANDOCLEVEL_SYSERR, /* system error */ 39 MANDOCLEVEL_MAX 40 }; 41 42 /* 43 * All possible things that can go wrong within a parse, be it libroff, 44 * libmdoc, or libman. 45 */ 46 enum mandocerr { 47 MANDOCERR_OK, 48 49 MANDOCERR_WARNING, /* ===== start of warnings ===== */ 50 51 /* related to the prologue */ 52 MANDOCERR_DT_NOTITLE, /* missing manual title, using UNTITLED: line */ 53 MANDOCERR_TH_NOTITLE, /* missing manual title, using "": [macro] */ 54 MANDOCERR_TITLE_CASE, /* lower case character in document title */ 55 MANDOCERR_MSEC_MISSING, /* missing manual section, using "": macro */ 56 MANDOCERR_MSEC_BAD, /* unknown manual section: Dt ... section */ 57 MANDOCERR_ARCH_BAD, /* unknown manual volume or arch: Dt ... volume */ 58 MANDOCERR_DATE_MISSING, /* missing date, using today's date */ 59 MANDOCERR_DATE_BAD, /* cannot parse date, using it verbatim: date */ 60 MANDOCERR_OS_MISSING, /* missing Os macro, using "" */ 61 MANDOCERR_PROLOG_REP, /* duplicate prologue macro: macro */ 62 MANDOCERR_PROLOG_LATE, /* late prologue macro: macro */ 63 MANDOCERR_DT_LATE, /* skipping late title macro: Dt args */ 64 MANDOCERR_PROLOG_ORDER, /* prologue macros out of order: macros */ 65 66 /* related to document structure */ 67 MANDOCERR_SO, /* .so is fragile, better use ln(1): so path */ 68 MANDOCERR_DOC_EMPTY, /* no document body */ 69 MANDOCERR_SEC_BEFORE, /* content before first section header: macro */ 70 MANDOCERR_NAMESEC_FIRST, /* first section is not NAME: Sh title */ 71 MANDOCERR_NAMESEC_BAD, /* bad NAME section contents: macro */ 72 MANDOCERR_SEC_ORDER, /* sections out of conventional order: Sh title */ 73 MANDOCERR_SEC_REP, /* duplicate section title: Sh title */ 74 MANDOCERR_SEC_MSEC, /* unexpected section: Sh title for ... only */ 75 76 /* related to macros and nesting */ 77 MANDOCERR_MACRO_OBS, /* obsolete macro: macro */ 78 MANDOCERR_PAR_SKIP, /* skipping paragraph macro: macro ... */ 79 MANDOCERR_PAR_MOVE, /* moving paragraph macro out of list: macro */ 80 MANDOCERR_NS_SKIP, /* skipping no-space macro */ 81 MANDOCERR_BLK_NEST, /* blocks badly nested: macro ... */ 82 MANDOCERR_BD_NEST, /* nested displays are not portable: macro ... */ 83 MANDOCERR_BL_MOVE, /* moving content out of list: macro */ 84 MANDOCERR_VT_CHILD, /* .Vt block has child macro: macro */ 85 MANDOCERR_FI_SKIP, /* fill mode already enabled, skipping: fi */ 86 MANDOCERR_NF_SKIP, /* fill mode already disabled, skipping: nf */ 87 MANDOCERR_BLK_LINE, /* line scope broken: macro breaks macro */ 88 89 /* related to missing arguments */ 90 MANDOCERR_REQ_EMPTY, /* skipping empty request: request */ 91 MANDOCERR_COND_EMPTY, /* conditional request controls empty scope */ 92 MANDOCERR_MACRO_EMPTY, /* skipping empty macro: macro */ 93 MANDOCERR_ARG_EMPTY, /* empty argument, using 0n: macro arg */ 94 MANDOCERR_ARGCWARN, /* argument count wrong */ 95 MANDOCERR_BD_NOTYPE, /* missing display type, using -ragged: Bd */ 96 MANDOCERR_BL_LATETYPE, /* list type is not the first argument: Bl arg */ 97 MANDOCERR_BL_NOWIDTH, /* missing -width in -tag list, using 8n */ 98 MANDOCERR_EX_NONAME, /* missing utility name, using "": Ex */ 99 MANDOCERR_IT_NOHEAD, /* empty head in list item: Bl -type It */ 100 MANDOCERR_IT_NOBODY, /* empty list item: Bl -type It */ 101 MANDOCERR_BF_NOFONT, /* missing font type, using \fR: Bf */ 102 MANDOCERR_BF_BADFONT, /* unknown font type, using \fR: Bf font */ 103 MANDOCERR_ARG_STD, /* missing -std argument, adding it: macro */ 104 105 /* related to bad arguments */ 106 MANDOCERR_ARG_QUOTE, /* unterminated quoted argument */ 107 MANDOCERR_ARG_REP, /* duplicate argument: macro arg */ 108 MANDOCERR_AN_REP, /* skipping duplicate argument: An -arg */ 109 MANDOCERR_BD_REP, /* skipping duplicate display type: Bd -type */ 110 MANDOCERR_BL_REP, /* skipping duplicate list type: Bl -type */ 111 MANDOCERR_BL_SKIPW, /* skipping -width argument: Bl -type */ 112 MANDOCERR_AT_BAD, /* unknown AT&T UNIX version: At version */ 113 MANDOCERR_RS_BAD, /* invalid content in Rs block: macro */ 114 MANDOCERR_SM_BAD, /* invalid Boolean argument: macro arg */ 115 MANDOCERR_FT_BAD, /* unknown font, skipping request: ft font */ 116 117 /* related to plain text */ 118 MANDOCERR_FI_BLANK, /* blank line in fill mode, using .sp */ 119 MANDOCERR_FI_TAB, /* tab in filled text */ 120 MANDOCERR_SPACE_EOL, /* whitespace at end of input line */ 121 MANDOCERR_COMMENT_BAD, /* bad comment style */ 122 MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */ 123 MANDOCERR_STR_UNDEF, /* undefined string, using "": name */ 124 125 MANDOCERR_ERROR, /* ===== start of errors ===== */ 126 127 /* related to equations */ 128 MANDOCERR_EQNNSCOPE, /* unexpected equation scope closure*/ 129 MANDOCERR_EQNSCOPE, /* equation scope open on exit */ 130 MANDOCERR_EQNBADSCOPE, /* overlapping equation scopes */ 131 MANDOCERR_EQNEOF, /* unexpected end of equation */ 132 MANDOCERR_EQNSYNT, /* equation syntax error */ 133 134 /* related to tables */ 135 MANDOCERR_TBL, /* bad table syntax */ 136 MANDOCERR_TBLOPT, /* bad table option */ 137 MANDOCERR_TBLLAYOUT, /* bad table layout */ 138 MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */ 139 MANDOCERR_TBLNODATA, /* no table data cells specified */ 140 MANDOCERR_TBLIGNDATA, /* ignore data in cell */ 141 MANDOCERR_TBLBLOCK, /* data block still open */ 142 MANDOCERR_TBLEXTRADAT, /* ignoring extra data cells */ 143 144 /* related to document structure and macros */ 145 MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */ 146 MANDOCERR_BADCHAR, /* skipping bad character: number */ 147 MANDOCERR_MACRO, /* skipping unknown macro: macro */ 148 MANDOCERR_IT_STRAY, /* skipping item outside list: It ... */ 149 MANDOCERR_TA_STRAY, /* skipping column outside column list: Ta */ 150 MANDOCERR_BLK_NOTOPEN, /* skipping end of block that is not open */ 151 MANDOCERR_BLK_BROKEN, /* inserting missing end of block: macro ... */ 152 MANDOCERR_BLK_NOEND, /* appending missing end of block: macro */ 153 154 /* related to request and macro arguments */ 155 MANDOCERR_NAMESC, /* escaped character not allowed in a name: name */ 156 MANDOCERR_ARGCOUNT, /* argument count wrong */ 157 MANDOCERR_BL_NOTYPE, /* missing list type, using -item: Bl */ 158 MANDOCERR_NM_NONAME, /* missing manual name, using "": Nm */ 159 MANDOCERR_OS_UNAME, /* uname(3) system call failed, using UNKNOWN */ 160 MANDOCERR_ST_BAD, /* unknown standard specifier: St standard */ 161 MANDOCERR_IT_NONUM, /* skipping request without numeric argument */ 162 MANDOCERR_ARG_SKIP, /* skipping all arguments: macro args */ 163 MANDOCERR_ARG_EXCESS, /* skipping excess arguments: macro ... args */ 164 165 MANDOCERR_FATAL, /* ===== start of fatal errors ===== */ 166 167 MANDOCERR_TOOLARGE, /* input too large */ 168 MANDOCERR_BD_FILE, /* NOT IMPLEMENTED: Bd -file */ 169 MANDOCERR_SO_PATH, /* NOT IMPLEMENTED: .so with absolute path or ".." */ 170 MANDOCERR_SO_FAIL, /* .so request failed */ 171 172 /* ===== system errors ===== */ 173 174 MANDOCERR_SYSOPEN, /* cannot open file */ 175 MANDOCERR_SYSSTAT, /* cannot stat file */ 176 MANDOCERR_SYSREAD, /* cannot read file */ 177 178 MANDOCERR_MAX 179 }; 180 181 struct tbl_opts { 182 char tab; /* cell-separator */ 183 char decimal; /* decimal point */ 184 int linesize; 185 int opts; 186 #define TBL_OPT_CENTRE (1 << 0) 187 #define TBL_OPT_EXPAND (1 << 1) 188 #define TBL_OPT_BOX (1 << 2) 189 #define TBL_OPT_DBOX (1 << 3) 190 #define TBL_OPT_ALLBOX (1 << 4) 191 #define TBL_OPT_NOKEEP (1 << 5) 192 #define TBL_OPT_NOSPACE (1 << 6) 193 int cols; /* number of columns */ 194 }; 195 196 /* 197 * The head of a table specifies all of its columns. When formatting a 198 * tbl_span, iterate over these and plug in data from the tbl_span when 199 * appropriate, using tbl_cell as a guide to placement. 200 */ 201 struct tbl_head { 202 int ident; /* 0 <= unique id < cols */ 203 int vert; /* width of preceding vertical line */ 204 struct tbl_head *next; 205 struct tbl_head *prev; 206 }; 207 208 enum tbl_cellt { 209 TBL_CELL_CENTRE, /* c, C */ 210 TBL_CELL_RIGHT, /* r, R */ 211 TBL_CELL_LEFT, /* l, L */ 212 TBL_CELL_NUMBER, /* n, N */ 213 TBL_CELL_SPAN, /* s, S */ 214 TBL_CELL_LONG, /* a, A */ 215 TBL_CELL_DOWN, /* ^ */ 216 TBL_CELL_HORIZ, /* _, - */ 217 TBL_CELL_DHORIZ, /* = */ 218 TBL_CELL_MAX 219 }; 220 221 /* 222 * A cell in a layout row. 223 */ 224 struct tbl_cell { 225 struct tbl_cell *next; 226 int vert; /* width of preceding vertical line */ 227 enum tbl_cellt pos; 228 size_t spacing; 229 int flags; 230 #define TBL_CELL_TALIGN (1 << 0) /* t, T */ 231 #define TBL_CELL_BALIGN (1 << 1) /* d, D */ 232 #define TBL_CELL_BOLD (1 << 2) /* fB, B, b */ 233 #define TBL_CELL_ITALIC (1 << 3) /* fI, I, i */ 234 #define TBL_CELL_EQUAL (1 << 4) /* e, E */ 235 #define TBL_CELL_UP (1 << 5) /* u, U */ 236 #define TBL_CELL_WIGN (1 << 6) /* z, Z */ 237 struct tbl_head *head; 238 }; 239 240 /* 241 * A layout row. 242 */ 243 struct tbl_row { 244 struct tbl_row *next; 245 struct tbl_cell *first; 246 struct tbl_cell *last; 247 int vert; /* trailing vertical line */ 248 }; 249 250 enum tbl_datt { 251 TBL_DATA_NONE, /* has no data */ 252 TBL_DATA_DATA, /* consists of data/string */ 253 TBL_DATA_HORIZ, /* horizontal line */ 254 TBL_DATA_DHORIZ, /* double-horizontal line */ 255 TBL_DATA_NHORIZ, /* squeezed horizontal line */ 256 TBL_DATA_NDHORIZ /* squeezed double-horizontal line */ 257 }; 258 259 /* 260 * A cell within a row of data. The "string" field contains the actual 261 * string value that's in the cell. The rest is layout. 262 */ 263 struct tbl_dat { 264 struct tbl_cell *layout; /* layout cell */ 265 int spans; /* how many spans follow */ 266 struct tbl_dat *next; 267 char *string; /* data (NULL if not TBL_DATA_DATA) */ 268 enum tbl_datt pos; 269 }; 270 271 enum tbl_spant { 272 TBL_SPAN_DATA, /* span consists of data */ 273 TBL_SPAN_HORIZ, /* span is horizontal line */ 274 TBL_SPAN_DHORIZ /* span is double horizontal line */ 275 }; 276 277 /* 278 * A row of data in a table. 279 */ 280 struct tbl_span { 281 struct tbl_opts *opts; 282 struct tbl_head *head; 283 struct tbl_row *layout; /* layout row */ 284 struct tbl_dat *first; 285 struct tbl_dat *last; 286 int line; /* parse line */ 287 int flags; 288 #define TBL_SPAN_FIRST (1 << 0) 289 #define TBL_SPAN_LAST (1 << 1) 290 enum tbl_spant pos; 291 struct tbl_span *next; 292 }; 293 294 enum eqn_boxt { 295 EQN_ROOT, /* root of parse tree */ 296 EQN_TEXT, /* text (number, variable, whatever) */ 297 EQN_SUBEXPR, /* nested `eqn' subexpression */ 298 EQN_LIST, /* subexpressions list */ 299 EQN_MATRIX /* matrix subexpression */ 300 }; 301 302 enum eqn_markt { 303 EQNMARK_NONE = 0, 304 EQNMARK_DOT, 305 EQNMARK_DOTDOT, 306 EQNMARK_HAT, 307 EQNMARK_TILDE, 308 EQNMARK_VEC, 309 EQNMARK_DYAD, 310 EQNMARK_BAR, 311 EQNMARK_UNDER, 312 EQNMARK__MAX 313 }; 314 315 enum eqn_fontt { 316 EQNFONT_NONE = 0, 317 EQNFONT_ROMAN, 318 EQNFONT_BOLD, 319 EQNFONT_FAT, 320 EQNFONT_ITALIC, 321 EQNFONT__MAX 322 }; 323 324 enum eqn_post { 325 EQNPOS_NONE = 0, 326 EQNPOS_OVER, 327 EQNPOS_SUP, 328 EQNPOS_SUB, 329 EQNPOS_TO, 330 EQNPOS_FROM, 331 EQNPOS__MAX 332 }; 333 334 enum eqn_pilet { 335 EQNPILE_NONE = 0, 336 EQNPILE_PILE, 337 EQNPILE_CPILE, 338 EQNPILE_RPILE, 339 EQNPILE_LPILE, 340 EQNPILE_COL, 341 EQNPILE_CCOL, 342 EQNPILE_RCOL, 343 EQNPILE_LCOL, 344 EQNPILE__MAX 345 }; 346 347 /* 348 * A "box" is a parsed mathematical expression as defined by the eqn.7 349 * grammar. 350 */ 351 struct eqn_box { 352 int size; /* font size of expression */ 353 #define EQN_DEFSIZE INT_MIN 354 enum eqn_boxt type; /* type of node */ 355 struct eqn_box *first; /* first child node */ 356 struct eqn_box *last; /* last child node */ 357 struct eqn_box *next; /* node sibling */ 358 struct eqn_box *parent; /* node sibling */ 359 char *text; /* text (or NULL) */ 360 char *left; 361 char *right; 362 enum eqn_post pos; /* position of next box */ 363 enum eqn_markt mark; /* a mark about the box */ 364 enum eqn_fontt font; /* font of box */ 365 enum eqn_pilet pile; /* equation piling */ 366 }; 367 368 /* 369 * An equation consists of a tree of expressions starting at a given 370 * line and position. 371 */ 372 struct eqn { 373 char *name; /* identifier (or NULL) */ 374 struct eqn_box *root; /* root mathematical expression */ 375 int ln; /* invocation line */ 376 int pos; /* invocation position */ 377 }; 378 379 /* 380 * Parse options. 381 */ 382 #define MPARSE_MDOC 1 /* assume -mdoc */ 383 #define MPARSE_MAN 2 /* assume -man */ 384 #define MPARSE_SO 4 /* honour .so requests */ 385 #define MPARSE_QUICK 8 /* abort the parse early */ 386 387 enum mandoc_esc { 388 ESCAPE_ERROR = 0, /* bail! unparsable escape */ 389 ESCAPE_IGNORE, /* escape to be ignored */ 390 ESCAPE_SPECIAL, /* a regular special character */ 391 ESCAPE_FONT, /* a generic font mode */ 392 ESCAPE_FONTBOLD, /* bold font mode */ 393 ESCAPE_FONTITALIC, /* italic font mode */ 394 ESCAPE_FONTBI, /* bold italic font mode */ 395 ESCAPE_FONTROMAN, /* roman font mode */ 396 ESCAPE_FONTPREV, /* previous font mode */ 397 ESCAPE_NUMBERED, /* a numbered glyph */ 398 ESCAPE_UNICODE, /* a unicode codepoint */ 399 ESCAPE_NOSPACE, /* suppress space if the last on a line */ 400 ESCAPE_SKIPCHAR /* skip the next character */ 401 }; 402 403 typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, 404 const char *, int, int, const char *); 405 406 struct mparse; 407 struct mchars; 408 struct mdoc; 409 struct man; 410 411 __BEGIN_DECLS 412 413 enum mandoc_esc mandoc_escape(const char **, const char **, int *); 414 struct mchars *mchars_alloc(void); 415 void mchars_free(struct mchars *); 416 char mchars_num2char(const char *, size_t); 417 int mchars_num2uc(const char *, size_t); 418 int mchars_spec2cp(const struct mchars *, 419 const char *, size_t); 420 const char *mchars_spec2str(const struct mchars *, 421 const char *, size_t, size_t *); 422 struct mparse *mparse_alloc(int, enum mandoclevel, mandocmsg, 423 const char *); 424 void mparse_free(struct mparse *); 425 void mparse_keep(struct mparse *); 426 enum mandoclevel mparse_readfd(struct mparse *, int, const char *); 427 enum mandoclevel mparse_readmem(struct mparse *, const void *, size_t, 428 const char *); 429 void mparse_reset(struct mparse *); 430 void mparse_result(struct mparse *, 431 struct mdoc **, struct man **, char **); 432 const char *mparse_getkeep(const struct mparse *); 433 const char *mparse_strerror(enum mandocerr); 434 const char *mparse_strlevel(enum mandoclevel); 435 436 __END_DECLS 437 438 #endif /*!MANDOC_H*/ 439