1##
2##  OSSP cfg - Configuration Parsing
3##  Copyright (c) 2002-2006 Ralf S. Engelschall <rse@engelschall.com>
4##  Copyright (c) 2002-2006 The OSSP Project (http://www.ossp.org/)
5##
6##  This file is part of OSSP cfg, a configuration parsing
7##  library which can be found at http://www.ossp.org/pkg/lib/cfg/.
8##
9##  Permission to use, copy, modify, and distribute this software for
10##  any purpose with or without fee is hereby granted, provided that
11##  the above copyright notice and this permission notice appear in all
12##  copies.
13##
14##  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
15##  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
16##  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17##  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
18##  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19##  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20##  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
21##  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22##  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23##  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
24##  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25##  SUCH DAMAGE.
26##
27##  cfg.pod: manual page
28##
29
30=pod
31
32=head1 NAME
33
34B<OSSP cfg> - Configuration Parsing
35
36=head1 VERSION
37
38OSSP cfg CFG_VERSION_STR
39
40=head1 SYNOPSIS
41
42=over 4
43
44=item B<API Header:>
45
46cfg.h
47
48=item B<API Types:>
49
50cfg_t,
51cfg_rc_t,
52cfg_node_type_t,
53cfg_node_t,
54cfg_node_attr_t,
55cfg_fmt_t,
56cfg_data_t,
57cfg_data_ctrl_t,
58cfg_data_cb_t,
59cfg_data_attr_t
60
61=item B<API Functions:>
62
63cfg_create,
64cfg_destroy,
65cfg_error,
66cfg_version,
67cfg_import,
68cfg_export,
69cfg_node_create,
70cfg_node_destroy,
71cfg_node_clone,
72cfg_node_set,
73cfg_node_get,
74cfg_node_root,
75cfg_node_select,
76cfg_node_find,
77cfg_node_apply,
78cfg_node_cmp,
79cfg_node_link,
80cfg_node_unlink,
81cfg_data_set,
82cfg_data_get,
83cfg_data_ctrl
84
85=back
86
87=head1 DESCRIPTION
88
89B<OSSP cfg> is a ISO-C library for parsing arbitrary C/C++-style
90configuration files. A configuration is sequence of directives. Each
91directive consists of zero or more tokens. Each token can be either a
92string or again a complete sequence. This means the configuration syntax
93has a recursive structure and this way allows to create configurations
94with arbitrarily nested sections.
95
96Additionally the configuration syntax provides complex
97single/double/balanced quoting of tokens, hexadecimal/octal/decimal
98character encodings, character escaping, C/C++ and Shell-style comments,
99etc. The library API allows importing a configuration text into an
100Abstract Syntax Tree (AST), traversing the AST and optionally exporting
101the AST again as a configuration text.
102
103=head2 CONFIGURATION SYNTAX
104
105The configuration syntax is described by the following context-free
106(Chomsky-2) grammar:
107
108B<sequence>   ::= I<empty>
109             | B<directive>
110             | B<directive> B<SEP> B<sequence>
111
112B<directive>  ::= B<token>
113             | B<token> B<directive>
114
115B<token>      ::= B<OPEN> B<sequence> B<CLOSE>
116             | B<string>
117
118B<string>     ::= B<DQ_STRING>   # double quoted string
119             | B<SQ_STRING>   # single quoted string
120             | B<FQ_STRING>   # flexible quoted string
121             | B<PT_STRING>   # plain text string
122
123The other contained terminal symbols are defined itself by the following
124set of grammars production (regular sub-grammars for character
125sequences given as Perl-style regular expressions "/I<regex>/"):
126
127B<SEP>        ::= /;/
128
129B<OPEN>       ::= /{/
130
131B<CLOSE>      ::= /}/
132
133B<DQ_STRING>  ::= /"/ B<DQ_CHARS> /"/
134
135B<DQ_CHARS>   ::= I<empty>
136             | B<DQ_CHAR> B<DQ_CHARS>
137
138B<DQ_CHAR>    ::= /\\"/               # escaped quote
139             | /\\x\{[0-9a-fA-F]+\}/ # hex-char group
140             | /\\x[0-9a-fA-F]{2}/ # hex-char
141             | /\\[0-7]{1,3}/      # octal character
142             | /\\[nrtbfae]/       # special character
143             | /\\\n[ \t]*/        # line continuation
144             | /\\\\/              # escaped escape
145             | /./                 # any other char
146
147B<SQ_STRING>  ::= /'/ B<SQ_CHARS> /'/
148
149B<SQ_CHARS>   ::= I<empty>
150             | B<SQ_CHAR> B<SQ_CHARS>
151
152B<SQ_CHAR>    ::= /\\'/               # escaped quote
153             | /\\\n[ \t]*/        # line contination
154             | /\\\\/              # escaped escape
155             | /./                 # any other char
156
157B<FQ_STRING>  ::= /q/ B<FQ_OPEN> B<FQ_CHARS> B<FQ_CLOSE>
158
159B<FQ_CHARS>   ::= I<empty>
160             | B<FQ_CHAR> B<FQ_CHARS>
161
162B<FQ_CHAR>    ::= /\\/ B<FQ_OPEN>        # escaped open
163             | /\\/ B<FQ_CLOSE>       # escaped close
164             | /\\\n[ \t]*/        # line contination
165             | /./                 # any other char
166
167B<FQ_OPEN>    ::= /[!"#$%&'()*+,-./:;<=>?@\[\\\]^_`{|}~]/
168
169B<FQ_CLOSE>   ::= E<lt>E<lt> B<FQ_OPEN> or corresponding closing char
170                  ('}])>') if B<FQ_OPEN> is a char of '{[(<' E<gt>E<gt>
171
172B<PT_STRING>  ::= B<PT_CHAR> B<PT_CHARS>
173
174B<PT_CHARS>   ::= I<empty>
175             | B<PT_CHAR> B<PT_STRING>
176
177B<PT_CHAR>    ::= /[^ \t\n;{}"']/     # none of specials
178
179Additionally, white-space B<WS> and comment B<CO> tokens are allowed at
180any position in the above productions of the previous grammar part.
181
182B<WS>         ::= /[ \t\n]+/
183
184B<CO>         ::= B<CO_C>                # style of C
185             | B<CO_CXX>              # style of C++
186             | B<CO_SH>               # style of /bin/sh
187
188B<CO_C>       ::= /\/\*([^*]|\*(?!\/))*\*\//
189
190B<CO_CXX>     ::= /\/\/[^\n]*/
191
192B<CO_SH>      ::= /#[^\n]*/
193
194Finally, any configuration line can have a trailing backslash character
195(C<\>) just before the newline character for simple line continuation.
196The backslash, the newline and (optionally) the leading whitespaces on
197the following line are silently obsorbed and as a side-effect continue
198the first line with the contents of the second lines.
199
200=head2 CONFIGURATION EXAMPLE
201
202A more intuitive description of the configuration syntax is perhaps given by
203the following example which shows all features at once:
204
205 /* single word */
206 foo;
207
208 /* multi word */
209 foo bar quux;
210
211 /* nested structure */
212 foo { bar; baz } quux;
213
214 /* quoted strings */
215 'foo bar'
216 "foo\x0a\t\n\
217  bar"
218
219=head1 APPLICATION PROGRAMMING INTERFACE (API)
220
221...
222
223=head1 NODE SELECTION SPECIFICATION
224
225The B<cfg_node_select> function takes a I<node selection specification>
226string B<select> for locating the intended nodes. This specification is
227defined as:
228
229B<select>           ::= I<empty>
230                   | B<select-step> B<select>
231
232B<select-step>      ::= B<select-direction>
233                     B<select-pattern>
234                     B<select-filter>
235
236B<select-direction> ::= "./"        # current node
237                   | "../"       # parent node
238                   | "..../"     # anchestor nodes
239                   | "-/"        # previous sibling node
240                   | "--/"       # preceeding sibling nodes
241                   | "+/"        # next sibling node
242                   | "++/"       # following sibling nodes
243                   | "/"         # child nodes
244                   | "//"        # descendant nodes
245
246B<select-pattern>   ::= /</ B<regex> />/
247                   | B<token>
248
249B<select-filter>    ::= I<empty>
250                   | /\[/ B<filter-range> /\]/
251
252B<filter-range>     ::= B<num>           # short for: num,num
253                   | B<num> /,/          # short for: num,-1
254                   | /,/ B<num>          # short for: 1,num
255                   | B<num> /,/ B<num>
256
257B<num>              ::= /^[+-]?[0-9]+/
258
259B<regex>            ::= E<lt>E<lt> Regular Expression (PCRE-based) E<gt>E<gt>
260
261B<token>            ::= E<lt>E<lt> Plain-Text Token String E<gt>E<gt>
262
263=head1 IMPLEMENTATION ISSUES
264
265Goal: non-hardcoded syntax tokens, only hard-coded syntax structure
266Goal: time-efficient parsing
267Goal: space-efficient storage
268Goal: representation of configuration as AST
269Goal: manipulation (annotation, etc) of AST via API
270Goal: dynamic syntax verification
271
272=head1 HISTORY
273
274B<OSSP cfg> was implemented in lots of small steps over a very
275long time. The first ideas date back to the year 1995 when
276Ralf S. Engelschall attended his first compiler construction lessons at
277university. But it was first time finished in summer 2002 by him for use
278in the B<OSSP> project.
279
280=head1 AUTHOR
281
282 Ralf S. Engelschall
283 rse@engelschall.com
284 www.engelschall.com
285
286=cut
287
288