xref: /minix/external/bsd/mdocml/dist/mandoc.3 (revision 0a6a1f1d)
1.\"	Id: mandoc.3,v 1.22 2013/10/06 17:01:52 schwarze Exp
2.\"
3.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4.\" Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5.\"
6.\" Permission to use, copy, modify, and distribute this software for any
7.\" purpose with or without fee is hereby granted, provided that the above
8.\" copyright notice and this permission notice appear in all copies.
9.\"
10.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17.\"
18.Dd October 6, 2013
19.Dt MANDOC 3
20.Os
21.Sh NAME
22.Nm mandoc ,
23.Nm mandoc_escape ,
24.Nm man_meta ,
25.Nm man_mparse ,
26.Nm man_node ,
27.Nm mchars_alloc ,
28.Nm mchars_free ,
29.Nm mchars_num2char ,
30.Nm mchars_num2uc ,
31.Nm mchars_spec2cp ,
32.Nm mchars_spec2str ,
33.Nm mdoc_meta ,
34.Nm mdoc_node ,
35.Nm mparse_alloc ,
36.Nm mparse_free ,
37.Nm mparse_getkeep ,
38.Nm mparse_keep ,
39.Nm mparse_readfd ,
40.Nm mparse_reset ,
41.Nm mparse_result ,
42.Nm mparse_strerror ,
43.Nm mparse_strlevel
44.Nd mandoc macro compiler library
45.Sh LIBRARY
46.Lb libmandoc
47.Sh SYNOPSIS
48.In man.h
49.In mdoc.h
50.In mandoc.h
51.Ft "enum mandoc_esc"
52.Fo mandoc_escape
53.Fa "const char const **end"
54.Fa "const char const **start"
55.Fa "int *sz"
56.Fc
57.Ft "const struct man_meta *"
58.Fo man_meta
59.Fa "const struct man *man"
60.Fc
61.Ft "const struct mparse *"
62.Fo man_mparse
63.Fa "const struct man *man"
64.Fc
65.Ft "const struct man_node *"
66.Fo man_node
67.Fa "const struct man *man"
68.Fc
69.Ft "struct mchars *"
70.Fn mchars_alloc "void"
71.Ft void
72.Fn mchars_free "struct mchars *p"
73.Ft char
74.Fn mchars_num2char "const char *cp" "size_t sz"
75.Ft int
76.Fn mchars_num2uc "const char *cp" "size_t sz"
77.Ft "const char *"
78.Fo mchars_spec2str
79.Fa "const struct mchars *p"
80.Fa "const char *cp"
81.Fa "size_t sz"
82.Fa "size_t *rsz"
83.Fc
84.Ft int
85.Fo mchars_spec2cp
86.Fa "const struct mchars *p"
87.Fa "const char *cp"
88.Fa "size_t sz"
89.Fc
90.Ft "const struct mdoc_meta *"
91.Fo mdoc_meta
92.Fa "const struct mdoc *mdoc"
93.Fc
94.Ft "const struct mdoc_node *"
95.Fo mdoc_node
96.Fa "const struct mdoc *mdoc"
97.Fc
98.Ft void
99.Fo mparse_alloc
100.Fa "enum mparset type"
101.Fa "enum mandoclevel wlevel"
102.Fa "mandocmsg msg"
103.Fa "void *msgarg"
104.Fc
105.Ft void
106.Fo mparse_free
107.Fa "struct mparse *parse"
108.Fc
109.Ft void
110.Fo mparse_getkeep
111.Fa "const struct mparse *parse"
112.Fc
113.Ft void
114.Fo mparse_keep
115.Fa "struct mparse *parse"
116.Fc
117.Ft "enum mandoclevel"
118.Fo mparse_readfd
119.Fa "struct mparse *parse"
120.Fa "int fd"
121.Fa "const char *fname"
122.Fc
123.Ft void
124.Fo mparse_reset
125.Fa "struct mparse *parse"
126.Fc
127.Ft void
128.Fo mparse_result
129.Fa "struct mparse *parse"
130.Fa "struct mdoc **mdoc"
131.Fa "struct man **man"
132.Fc
133.Ft "const char *"
134.Fo mparse_strerror
135.Fa "enum mandocerr"
136.Fc
137.Ft "const char *"
138.Fo mparse_strlevel
139.Fa "enum mandoclevel"
140.Fc
141.Vt extern const char * const * man_macronames;
142.Vt extern const char * const * mdoc_argnames;
143.Vt extern const char * const * mdoc_macronames;
144.Fd "#define ASCII_NBRSP"
145.Fd "#define ASCII_HYPH"
146.Sh DESCRIPTION
147The
148.Nm mandoc
149library parses a
150.Ux
151manual into an abstract syntax tree (AST).
152.Ux
153manuals are composed of
154.Xr mdoc 7
155or
156.Xr man 7 ,
157and may be mixed with
158.Xr roff 7 ,
159.Xr tbl 7 ,
160and
161.Xr eqn 7
162invocations.
163.Pp
164The following describes a general parse sequence:
165.Bl -enum
166.It
167initiate a parsing sequence with
168.Fn mparse_alloc ;
169.It
170parse files or file descriptors with
171.Fn mparse_readfd ;
172.It
173retrieve a parsed syntax tree, if the parse was successful, with
174.Fn mparse_result ;
175.It
176iterate over parse nodes with
177.Fn mdoc_node
178or
179.Fn man_node ;
180.It
181free all allocated memory with
182.Fn mparse_free ,
183or invoke
184.Fn mparse_reset
185and parse new files.
186.El
187.Pp
188The
189.Nm
190library also contains routines for translating character strings into glyphs
191.Pq see Fn mchars_alloc
192and parsing escape sequences from strings
193.Pq see Fn mandoc_escape .
194.Sh REFERENCE
195This section documents the functions, types, and variables available
196via
197.In mandoc.h .
198.Ss Types
199.Bl -ohang
200.It Vt "enum mandoc_esc"
201An escape sequence classification.
202.It Vt "enum mandocerr"
203A fatal error, error, or warning message during parsing.
204.It Vt "enum mandoclevel"
205A classification of an
206.Vt "enum mandoclevel"
207as regards system operation.
208.It Vt "struct mchars"
209An opaque pointer to an object allowing for translation between
210character strings and glyphs.
211See
212.Fn mchars_alloc .
213.It Vt "enum mparset"
214The type of parser when reading input.
215This should usually be
216.Dv MPARSE_AUTO
217for auto-detection.
218.It Vt "struct mparse"
219An opaque pointer to a running parse sequence.
220Created with
221.Fn mparse_alloc
222and freed with
223.Fn mparse_free .
224This may be used across parsed input if
225.Fn mparse_reset
226is called between parses.
227.It Vt "mandocmsg"
228A prototype for a function to handle fatal error, error, and warning
229messages emitted by the parser.
230.El
231.Ss Functions
232.Bl -ohang
233.It Fn mandoc_escape
234Scan an escape sequence, i.e., a character string beginning with
235.Sq \e .
236Pass a pointer to the character after the
237.Sq \e
238as
239.Va end ;
240it will be set to the supremum of the parsed escape sequence unless
241returning
242.Dv ESCAPE_ERROR ,
243in which case the string is bogus and should be
244thrown away.
245If not
246.Dv ESCAPE_ERROR
247or
248.Dv ESCAPE_IGNORE ,
249.Va start
250is set to the first relevant character of the substring (font, glyph,
251whatever) of length
252.Va sz .
253Both
254.Va start
255and
256.Va sz
257may be
258.Dv NULL .
259Declared in
260.In mandoc.h ,
261implemented in
262.Pa mandoc.c .
263.It Fn man_meta
264Obtain the meta-data of a successful parse.
265This may only be used on a pointer returned by
266.Fn mparse_result .
267Declared in
268.In man.h ,
269implemented in
270.Pa man.c .
271.It Fn man_mparse
272Get the parser used for the current output.
273Declared in
274.In man.h ,
275implemented in
276.Pa man.c .
277.It Fn man_node
278Obtain the root node of a successful parse.
279This may only be used on a pointer returned by
280.Fn mparse_result .
281Declared in
282.In man.h ,
283implemented in
284.Pa man.c .
285.It Fn mchars_alloc
286Allocate an
287.Vt "struct mchars *"
288object for translating special characters into glyphs.
289See
290.Xr mandoc_char 7
291for an overview of special characters.
292The object must be freed with
293.Fn mchars_free .
294Declared in
295.In mandoc.h ,
296implemented in
297.Pa chars.c .
298.It Fn mchars_free
299Free an object created with
300.Fn mchars_alloc .
301Declared in
302.In mandoc.h ,
303implemented in
304.Pa chars.c .
305.It Fn mchars_num2char
306Convert a character index (e.g., the \eN\(aq\(aq escape) into a
307printable ASCII character.
308Returns \e0 (the nil character) if the input sequence is malformed.
309Declared in
310.In mandoc.h ,
311implemented in
312.Pa chars.c .
313.It Fn mchars_num2uc
314Convert a hexadecimal character index (e.g., the \e[uNNNN] escape) into
315a Unicode codepoint.
316Returns \e0 (the nil character) if the input sequence is malformed.
317Declared in
318.In mandoc.h ,
319implemented in
320.Pa chars.c .
321.It Fn mchars_spec2cp
322Convert a special character into a valid Unicode codepoint.
323Returns \-1 on failure or a non-zero Unicode codepoint on success.
324Declared in
325.In mandoc.h ,
326implemented in
327.Pa chars.c .
328.It Fn mchars_spec2str
329Convert a special character into an ASCII string.
330Returns
331.Dv NULL
332on failure.
333Declared in
334.In mandoc.h ,
335implemented in
336.Pa chars.c .
337.It Fn mdoc_meta
338Obtain the meta-data of a successful parse.
339This may only be used on a pointer returned by
340.Fn mparse_result .
341Declared in
342.In mdoc.h ,
343implemented in
344.Pa mdoc.c .
345.It Fn mdoc_node
346Obtain the root node of a successful parse.
347This may only be used on a pointer returned by
348.Fn mparse_result .
349Declared in
350.In mdoc.h ,
351implemented in
352.Pa mdoc.c .
353.It Fn mparse_alloc
354Allocate a parser.
355The same parser may be used for multiple files so long as
356.Fn mparse_reset
357is called between parses.
358.Fn mparse_free
359must be called to free the memory allocated by this function.
360Declared in
361.In mandoc.h ,
362implemented in
363.Pa read.c .
364.It Fn mparse_free
365Free all memory allocated by
366.Fn mparse_alloc .
367Declared in
368.In mandoc.h ,
369implemented in
370.Pa read.c .
371.It Fn mparse_getkeep
372Acquire the keep buffer.
373Must follow a call of
374.Fn mparse_keep .
375Declared in
376.In mandoc.h ,
377implemented in
378.Pa read.c .
379.It Fn mparse_keep
380Instruct the parser to retain a copy of its parsed input.
381This can be acquired with subsequent
382.Fn mparse_getkeep
383calls.
384Declared in
385.In mandoc.h ,
386implemented in
387.Pa read.c .
388.It Fn mparse_readfd
389Parse a file or file descriptor.
390If
391.Va fd
392is -1,
393.Va fname
394is opened for reading.
395Otherwise,
396.Va fname
397is assumed to be the name associated with
398.Va fd .
399This may be called multiple times with different parameters; however,
400.Fn mparse_reset
401should be invoked between parses.
402Declared in
403.In mandoc.h ,
404implemented in
405.Pa read.c .
406.It Fn mparse_reset
407Reset a parser so that
408.Fn mparse_readfd
409may be used again.
410Declared in
411.In mandoc.h ,
412implemented in
413.Pa read.c .
414.It Fn mparse_result
415Obtain the result of a parse.
416Only successful parses
417.Po
418i.e., those where
419.Fn mparse_readfd
420returned less than MANDOCLEVEL_FATAL
421.Pc
422should invoke this function, in which case one of the two pointers will
423be filled in.
424Declared in
425.In mandoc.h ,
426implemented in
427.Pa read.c .
428.It Fn mparse_strerror
429Return a statically-allocated string representation of an error code.
430Declared in
431.In mandoc.h ,
432implemented in
433.Pa read.c .
434.It Fn mparse_strlevel
435Return a statically-allocated string representation of a level code.
436Declared in
437.In mandoc.h ,
438implemented in
439.Pa read.c .
440.El
441.Ss Variables
442.Bl -ohang
443.It Va man_macronames
444The string representation of a man macro as indexed by
445.Vt "enum mant" .
446.It Va mdoc_argnames
447The string representation of a mdoc macro argument as indexed by
448.Vt "enum mdocargt" .
449.It Va mdoc_macronames
450The string representation of a mdoc macro as indexed by
451.Vt "enum mdoct" .
452.El
453.Sh IMPLEMENTATION NOTES
454This section consists of structural documentation for
455.Xr mdoc 7
456and
457.Xr man 7
458syntax trees and strings.
459.Ss Man and Mdoc Strings
460Strings may be extracted from mdoc and man meta-data, or from text
461nodes (MDOC_TEXT and MAN_TEXT, respectively).
462These strings have special non-printing formatting cues embedded in the
463text itself, as well as
464.Xr roff 7
465escapes preserved from input.
466Implementing systems will need to handle both situations to produce
467human-readable text.
468In general, strings may be assumed to consist of 7-bit ASCII characters.
469.Pp
470The following non-printing characters may be embedded in text strings:
471.Bl -tag -width Ds
472.It Dv ASCII_NBRSP
473A non-breaking space character.
474.It Dv ASCII_HYPH
475A soft hyphen.
476.El
477.Pp
478Escape characters are also passed verbatim into text strings.
479An escape character is a sequence of characters beginning with the
480backslash
481.Pq Sq \e .
482To construct human-readable text, these should be intercepted with
483.Fn mandoc_escape
484and converted with one of
485.Fn mchars_num2char ,
486.Fn mchars_spec2str ,
487and so on.
488.Ss Man Abstract Syntax Tree
489This AST is governed by the ontological rules dictated in
490.Xr man 7
491and derives its terminology accordingly.
492.Pp
493The AST is composed of
494.Vt struct man_node
495nodes with element, root and text types as declared by the
496.Va type
497field.
498Each node also provides its parse point (the
499.Va line ,
500.Va sec ,
501and
502.Va pos
503fields), its position in the tree (the
504.Va parent ,
505.Va child ,
506.Va next
507and
508.Va prev
509fields) and some type-specific data.
510.Pp
511The tree itself is arranged according to the following normal form,
512where capitalised non-terminals represent nodes.
513.Pp
514.Bl -tag -width "ELEMENTXX" -compact
515.It ROOT
516\(<- mnode+
517.It mnode
518\(<- ELEMENT | TEXT | BLOCK
519.It BLOCK
520\(<- HEAD BODY
521.It HEAD
522\(<- mnode*
523.It BODY
524\(<- mnode*
525.It ELEMENT
526\(<- ELEMENT | TEXT*
527.It TEXT
528\(<- [[:ascii:]]*
529.El
530.Pp
531The only elements capable of nesting other elements are those with
532next-lint scope as documented in
533.Xr man 7 .
534.Ss Mdoc Abstract Syntax Tree
535This AST is governed by the ontological
536rules dictated in
537.Xr mdoc 7
538and derives its terminology accordingly.
539.Qq In-line
540elements described in
541.Xr mdoc 7
542are described simply as
543.Qq elements .
544.Pp
545The AST is composed of
546.Vt struct mdoc_node
547nodes with block, head, body, element, root and text types as declared
548by the
549.Va type
550field.
551Each node also provides its parse point (the
552.Va line ,
553.Va sec ,
554and
555.Va pos
556fields), its position in the tree (the
557.Va parent ,
558.Va child ,
559.Va nchild ,
560.Va next
561and
562.Va prev
563fields) and some type-specific data, in particular, for nodes generated
564from macros, the generating macro in the
565.Va tok
566field.
567.Pp
568The tree itself is arranged according to the following normal form,
569where capitalised non-terminals represent nodes.
570.Pp
571.Bl -tag -width "ELEMENTXX" -compact
572.It ROOT
573\(<- mnode+
574.It mnode
575\(<- BLOCK | ELEMENT | TEXT
576.It BLOCK
577\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
578.It ELEMENT
579\(<- TEXT*
580.It HEAD
581\(<- mnode*
582.It BODY
583\(<- mnode* [ENDBODY mnode*]
584.It TAIL
585\(<- mnode*
586.It TEXT
587\(<- [[:ascii:]]*
588.El
589.Pp
590Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
591the BLOCK production: these refer to punctuation marks.
592Furthermore, although a TEXT node will generally have a non-zero-length
593string, in the specific case of
594.Sq \&.Bd \-literal ,
595an empty line will produce a zero-length string.
596Multiple body parts are only found in invocations of
597.Sq \&Bl \-column ,
598where a new body introduces a new phrase.
599.Pp
600The
601.Xr mdoc 7
602syntax tree accommodates for broken block structures as well.
603The ENDBODY node is available to end the formatting associated
604with a given block before the physical end of that block.
605It has a non-null
606.Va end
607field, is of the BODY
608.Va type ,
609has the same
610.Va tok
611as the BLOCK it is ending, and has a
612.Va pending
613field pointing to that BLOCK's BODY node.
614It is an indirect child of that BODY node
615and has no children of its own.
616.Pp
617An ENDBODY node is generated when a block ends while one of its child
618blocks is still open, like in the following example:
619.Bd -literal -offset indent
620\&.Ao ao
621\&.Bo bo ac
622\&.Ac bc
623\&.Bc end
624.Ed
625.Pp
626This example results in the following block structure:
627.Bd -literal -offset indent
628BLOCK Ao
629    HEAD Ao
630    BODY Ao
631        TEXT ao
632        BLOCK Bo, pending -> Ao
633            HEAD Bo
634            BODY Bo
635                TEXT bo
636                TEXT ac
637                ENDBODY Ao, pending -> Ao
638                TEXT bc
639TEXT end
640.Ed
641.Pp
642Here, the formatting of the
643.Sq \&Ao
644block extends from TEXT ao to TEXT ac,
645while the formatting of the
646.Sq \&Bo
647block extends from TEXT bo to TEXT bc.
648It renders as follows in
649.Fl T Ns Cm ascii
650mode:
651.Pp
652.Dl <ao [bo ac> bc] end
653.Pp
654Support for badly-nested blocks is only provided for backward
655compatibility with some older
656.Xr mdoc 7
657implementations.
658Using badly-nested blocks is
659.Em strongly discouraged ;
660for example, the
661.Fl T Ns Cm html
662and
663.Fl T Ns Cm xhtml
664front-ends to
665.Xr mandoc 1
666are unable to render them in any meaningful way.
667Furthermore, behaviour when encountering badly-nested blocks is not
668consistent across troff implementations, especially when using  multiple
669levels of badly-nested blocks.
670.Sh SEE ALSO
671.Xr mandoc 1 ,
672.Xr eqn 7 ,
673.Xr man 7 ,
674.Xr mandoc_char 7 ,
675.Xr mdoc 7 ,
676.Xr roff 7 ,
677.Xr tbl 7
678.Sh AUTHORS
679The
680.Nm
681library was written by
682.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv .
683