/*- * Copyright (c) 2003-2004 Andrey Simonenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "config.h" #ifndef lint static const char rcsid[] ATTR_UNUSED = "@(#)$Id: parser.c,v 1.4 2011/01/23 18:42:35 simon Exp $"; #endif /* !lint */ /* * I considered using of yacc/lex for implementing the parser for * ipa.conf(5), but have found that there will not be much advantages * in using such tools. At least: ipa.conf(5) accepts macro variables * and a module is allowed to add macro variables at any time, when it * parses a configuration stream, so we have to get one-by-one logical * line from the file and then parse it or give it to a module for parsing. * If a module considers that yacc/lex or similar tools are better, then * it can mark all its arguments values as IPA_CONF_TYPE_MISC and take * full control on parsing any argument. */ #include #include #include #include #include #include #include #include #include #include "ipa_mod.h" #include "queue.h" #include "memfunc.h" #include "parser.h" /* * Set DEBUG_PARSER to non-zero value to output debug * information about parsing to stderr. */ #ifndef DEBUG_PARSER # define DEBUG_PARSER 0 #endif ipa_mem_type *m_parser; /* Memory allocated by parser. */ char *parser_str_buf = NULL; /* -> buffer in parser_stringify(). */ void (*parser_vlogmsgx)(const char *, va_list) = NULL; unsigned int parser_token_id; /* Token ID. */ char *parser_token; /* Token name or NULL (section end). */ char *parser_args; /* Arguments or NULL (no arguments). */ size_t parser_args_len; /* strlen() of arguments. */ int parser_nargs; /* Number of arguments. */ #define PB_SIZE 4096 /* Initial size of buffer. */ #define PB_SIZE_MAX (4096 * 1024) /* Maximum size of buffer. */ /* Stack of pbs. */ static SLIST_HEAD(EMPTY, parser_pb) pb_stack; /* Pointer to current char read by parser_get_curhcar(). */ static unsigned char *curchar; /* * One symbol (macro variable). */ struct psym { TAILQ_ENTRY(psym) link; /* List of symbols. */ char *sym; /* Symbol's name. */ char *val; /* Symbol's value. */ size_t val_len; /* Length of val. */ char copy_flag; /* If set then copy sym and val.*/ }; TAILQ_HEAD(psym_list, psym); /* Head of list of symbols. */ /* Local and global symbols lists. */ static struct psym_list local_sym_list; static struct psym_list global_sym_list; static int section_cnt; /* +1 when '{' and -1 when '}'. */ static unsigned int quotes_cnt; /* Number of '\"' in the argument. */ /* Set if we are in a string. */ static char in_string_flag; /* Set if there was a space character in arguments. */ static char was_space_flag; /* Set if we are in symbol definition. */ static char sym_def_flag; static void logmsgx(const char *, ...) ATTR_FORMAT(printf, 1, 2); static void syntax_logmsgx(const char *, ...) ATTR_FORMAT(printf, 1, 2); static void error_logmsgx(const char *, ...) ATTR_FORMAT(printf, 1, 2); #if DEBUG_PARSER static void debuglog(const char *, ...) ATTR_FORMAT(printf, 1, 2); #endif /* * Some constants for characters. */ enum { CH_NEWLINE = 0, /* 0 | '\n' */ CH_SEMICOLON, /* 1 | ';' */ CH_SPACE, /* 2 | ' ' or '\t' */ CH_QUOTE, /* 3 | '\"' */ CH_OPEN_BRACE, /* 4 | '{' */ CH_CLOSE_BRACE, /* 5 | '}' */ CH_EQUAL, /* 6 | '=' */ CH_POUND, /* 7 | '#' */ CH_DOLLAR, /* 8 | '$' */ CH_COLON, /* 9 | ':' */ CH_UNDERSCORE, /* 10 | '_' */ CH_BACKSLASH, /* 11 | '\\' */ CH_SLASH, /* 12 | '/' */ CH_ALPHA, /* 13 | letter */ CH_DIGIT, /* 14 | digit */ CH_PUNCT, /* 15 | punctuation */ CH_OTHER /* 16 | other */ }; #if DEBUG_PARSER static const char *const ch_msg[] = { "NEWLINE", "SEMICOLON", "SPACE", "QUOTE", "OPEN_BRACE", "CLOSE_BRACE", "EQUAL", "POUND", "DOLLAR", "COLON", "UNDERSCORE", "BACKSLASH", "SLASH", "ALPHA", "DIGIT", "PUNCT", "OTHER" }; #endif #define PARSER_AC_T unsigned char /* Type for AC_xxx values. */ #define PARSER_ST_T unsigned char /* Type for ST_xxx and ER_xxx values. */ /* * Parser's states. */ enum { ST_INI = 0, /* 0 | Initial state. */ ST_TOK, /* 1 | Characters go to the token. */ ST_ARG, /* 2 | Characters go to the argument. */ ST_STR, /* 3 | Characters go to a string in the argument. */ ST_AEX /* 4 | An argument is expected, wait for non space. */ }; #define ST_NO 5 /* Number of states. */ static PARSER_ST_T curstate = ST_INI; /* Current state. */ #if DEBUG_PARSER static const char *const st_msg[] = { "INI", "TOK", "ARG", "STR", "AEX" }; #endif /* * Parser's actions. */ enum { AC_SKP = 0, /* 0 | Skip a character, continue parsing. */ AC_AAR, /* 1 | Add a character to the argument. */ AC_ATK, /* 2 | Add a character to the token. */ AC_AQU, /* 3 | Add '\"' to the argument. */ AC_TOK, /* 4 | Definition of the token is complete. */ AC_PAR, /* 5 | Definition of the parameter is complete. */ AC_BOS, /* 6 | Begin of the section. */ AC_EOS, /* 7 | End of the section. */ AC_PND, /* 8 | '#' appeared. */ AC_FMT, /* 9 | '\\' appeared in a string. */ AC_ASP, /* 10 | Space in the argument. */ AC_SLH, /* 11 | '/' appeared. */ AC_DLR, /* 12 | '$' appeared. */ AC_ERR /* 13 | A syntax error occurred. */ }; #define AC_NO 14 /* Number of actions. */ #if DEBUG_PARSER static const char *const ac_msg[] = { "SKP", "AAR", "ATK", "AQU", "TOK", "PAR", "BOS", "EOS", "PND", "FMT", "ASP", "SLH", "DLR", "ERR" }; #endif /* * Parser's errors. */ enum { ER_UXP, /* 0 | Unexpected character. */ ER_NSN, /* 1 | No section name. */ ER_NLV, /* 2 | No lvalue: section/parameter name is absent). */ ER_FMT /* 3 | Wrong character after '\\' in a string. */ }; #if DEBUG_PARSER static const char *const er_msg[] = { "UXP", "NSN", "NLV", "FMT" }; #endif static const char *const error_msg[] = { /* 0 */ "unexpected character", /* 1 */ "no section name", /* 2 */ "no lvalue (section or parameter name is absent)", /* 3 */ "wrong format character after '\\' in a string" }; struct stac { PARSER_AC_T action; /* AC_xxx */ PARSER_ST_T state; /* ST_xxx or ER_xxx if action == AC_ERR. */ } ATTR_PACKED; static const struct stac *curstac; /* Current entry in stac_tbl. */ static const struct stac stac_tbl[][ST_NO] = { /* ST_INI ST_TOK ST_ARG ST_STR ST_AEX */ /* \n */ { {AC_SKP,ST_INI}, {AC_TOK,ST_AEX}, {AC_ASP,ST_ARG}, {AC_AAR,ST_STR}, {AC_SKP,ST_AEX} }, /* ; */ { {AC_ERR,ER_UXP}, {AC_PAR,ST_INI}, {AC_PAR,ST_INI}, {AC_AAR,ST_STR}, {AC_PAR,ST_INI} }, /* sp */ { {AC_SKP,ST_INI}, {AC_TOK,ST_AEX}, {AC_ASP,ST_ARG}, {AC_AAR,ST_STR}, {AC_SKP,ST_AEX} }, /* \" */ { {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AQU,ST_STR}, {AC_AQU,ST_ARG}, {AC_AQU,ST_STR} }, /* { */ { {AC_ERR,ER_NSN}, {AC_BOS,ST_INI}, {AC_BOS,ST_INI}, {AC_AAR,ST_STR}, {AC_BOS,ST_INI} }, /* } */ { {AC_EOS,ST_INI}, {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AAR,ST_STR}, {AC_ERR,ER_UXP} }, /* = */ { {AC_ERR,ER_NLV}, {AC_TOK,ST_ARG}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_SKP,ST_ARG} }, /* # */ { {AC_PND,ST_INI}, {AC_PND,ST_ARG}, {AC_PND,ST_ARG}, {AC_AAR,ST_STR}, {AC_PND,ST_AEX} }, /* $ */ { {AC_DLR,ST_AEX}, {AC_DLR,ST_TOK}, {AC_DLR,ST_ARG}, {AC_DLR,ST_STR}, {AC_DLR,ST_AEX} }, /* : */ { {AC_ATK,ST_TOK}, {AC_ATK,ST_TOK}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} }, /* _ */ { {AC_ATK,ST_TOK}, {AC_ATK,ST_TOK}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} }, /* \\ */ { {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AAR,ST_ARG}, {AC_FMT,ST_STR}, {AC_AAR,ST_ARG} }, /* / */ { {AC_SLH,ST_INI}, {AC_SLH,ST_TOK}, {AC_SLH,ST_ARG}, {AC_AAR,ST_STR}, {AC_SLH,ST_AEX} }, /*alpha*/ { {AC_ATK,ST_TOK}, {AC_ATK,ST_TOK}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} }, /*digit*/ { {AC_ERR,ER_UXP}, {AC_ATK,ST_TOK}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} }, /*punct*/ { {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AAR,ST_ARG}, {AC_AAR,ST_STR}, {AC_AAR,ST_ARG} }, /*other*/ { {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_ERR,ER_UXP}, {AC_AAR,ST_STR}, {AC_ERR,ER_UXP} } }; static unsigned char *whatis_char; /* Characters --> classes table. */ /* * This generic configuration buffer structure is used for adding * characters to token, argument and symbol name. */ struct conf_buf { char *buf; /* Buffer. */ size_t size; /* Its size. */ size_t off; /* Current offset. */ }; static struct conf_buf token_buf; /* Buffer for a token. */ static struct conf_buf args_buf; /* Buffer for arguments. */ static struct conf_buf sym_buf; /* Buffer for a symbol name. */ static struct conf_buf *cur_buf; /* Pointer to the current buffer. */ /* * Return codes from parser_get_curchar() and ac_xxx(). */ enum { AC_RET_CONT = 0, /* 0 | Update state, continue parsing. */ AC_RET_LINE, /* 1 | Update state, return logical line. */ AC_RET_CONTX, /* 2 | Continue parsing. */ AC_RET_ERR, /* 3 | An error occurred. */ AC_RET_EOF /* 4 | EOF. */ }; static int ac_skp(void), ac_aar(void), ac_atk(void), ac_aqu(void); static int ac_tok(void), ac_par(void), ac_bos(void), ac_eos(void); static int ac_pnd(void), ac_fmt(void), ac_asp(void), ac_slh(void); static int ac_dlr(void), ac_err(void); static int (* const ac_func_tbl[AC_NO])(void) = { ac_skp, /* AC_SKP */ ac_aar, /* AC_AAR */ ac_atk, /* AC_ATK */ ac_aqu, /* AC_AQU */ ac_tok, /* AC_TOK */ ac_par, /* AC_PAR */ ac_bos, /* AC_BOS */ ac_eos, /* AC_EOS */ ac_pnd, /* AC_PND */ ac_fmt, /* AC_FMT */ ac_asp, /* AC_ASP */ ac_slh, /* AC_SLH */ ac_dlr, /* AC_DLR */ ac_err /* AC_ERR */ }; /* * The wrapper for parser_vlog, its declaration above will check * format string and arguments. */ static void logmsgx(const char *format, ...) { va_list ap; va_start(ap, format); parser_vlogmsgx(format, ap); va_end(ap); } #if DEBUG_PARSER /* * printf-like function, which outputs debug messages to stderr. */ static void debuglog(const char *format, ...) { va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); } #endif /* * Output error message header. */ static void error_header_log(const char *what) { const struct parser_pb *pb; pb = SLIST_FIRST(&pb_stack); if (pb != NULL) { char buf[5]; if (isprint(*curchar)) { buf[0] = *curchar; buf[1] = '\0'; } else switch (*curchar) { case '\n': strncpy(buf, "\\n", sizeof(buf)); break; case '\t': strncpy(buf, "\\t", sizeof(buf)); break; default: snprintf(buf, sizeof(buf), "0x%02x", *curchar); } if (pb->fp != NULL) logmsgx("%serror occurred near line %u at character " "'%s' in %s", what, pb->lineno, buf, pb->fname); else logmsgx("%serror occurred near line %u at character " "'%s' in %s (in macro variable ${%s} value)", what, pb->lineno, buf, pb->fname, pb->sname); } else logmsgx("%serror occurred near last line in main " "configuration file", what); } /* * Report about syntax error. */ static void syntax_logmsgx(const char *format, ...) { va_list ap; error_header_log("syntax "); va_start(ap, format); parser_vlogmsgx(format, ap); va_end(ap); } /* * Report about syntax error given by the code. */ static void syntax_error(PARSER_ST_T error) { syntax_logmsgx("%s", error_msg[error]); } /* * Report about non-syntax error. */ static void error_logmsgx(const char *format, ...) { va_list ap; error_header_log(""); va_start(ap, format); parser_vlogmsgx(format, ap); va_end(ap); } /* * Return a pointer to psym for the symbol with name sym * in the given psym_list. */ static struct psym * find_sym(const struct psym_list *list, const char *sym) { struct psym *psym; TAILQ_FOREACH(psym, list, link) if (strcmp(psym->sym, sym) == 0) return (psym); return (NULL); } /* * Release memory used by the given psym_list. */ static void free_sym_list(struct psym_list *list) { struct psym *psym, *psym_next; TAILQ_FOREACH_SAFE(psym, list, link, psym_next) { if (psym->copy_flag) { mem_free(psym->sym, m_parser); mem_free(psym->val, m_parser); } mem_free(psym, m_parser); } TAILQ_INIT(list); } /* * Add a new symbol with name sym and with value val to list, if * the list already has this symbol, then replace it with a new value, * honoring the copy_flag value. */ static int sym_add(struct psym_list *list, char *sym, char *val, int copy_flag) { struct psym *psym; char old_copy_flag; psym = find_sym(list, sym); if (psym != NULL) { /* Replace symbol. */ old_copy_flag = psym->copy_flag; if (old_copy_flag) { if (!copy_flag) mem_free(psym->sym, m_parser); mem_free(psym->val, m_parser); } } else { /* New symbol. */ old_copy_flag = 0; psym = mem_malloc(sizeof(*psym), m_parser); if (psym == NULL) { logmsgx("sym_add: mem_malloc failed"); return (-1); } TAILQ_INSERT_TAIL(list, psym, link); } if (copy_flag) { if (!old_copy_flag) { psym->sym = mem_strdup(sym, m_parser); if (psym->sym == NULL) { logmsgx("sym_add: mem_strdup failed"); mem_free(psym, m_parser); return (-1); } } psym->val = mem_strdup(val, m_parser); if (psym->val == NULL) { logmsgx("sym_add: mem_strdup failed"); mem_free(psym->sym, m_parser); mem_free(psym, m_parser); return (-1); } } else { psym->sym = sym; psym->val = val; } psym->val_len = strlen(val); psym->copy_flag = copy_flag; return (0); } /* * Remove a symbol sym from the list, return -1 if the list * does not have a symbol with the given name. */ static int sym_del(struct psym_list *list, const char *sym) { struct psym *psym; psym = find_sym(list, sym); if (psym != NULL) { TAILQ_REMOVE(list, psym, link); if (psym->copy_flag) { mem_free(psym->sym, m_parser); mem_free(psym->val, m_parser); } mem_free(psym, m_parser); } else return (-1); return (0); } /* * Delete a local symbol. */ int parser_local_sym_del(const char *sym) { return (sym_del(&local_sym_list, sym)); } /* * Add a local symbol. */ int parser_local_sym_add(char *sym, char *val, int copy_flag) { return (sym_add(&local_sym_list, sym, val, copy_flag)); } /* * Delete a global symbol. */ int parser_global_sym_del(const char *sym) { return (sym_del(&global_sym_list, sym)); } /* * Add a global symbol. */ int parser_global_sym_add(char *sym, char *val, int copy_flag) { return (sym_add(&global_sym_list, sym, val, copy_flag)); } /* * Create a new pb. If size is zero, then allocate pb->buf, * else do not allocate pb->buf, it should be allocated somewhere * outside of this function. */ struct parser_pb * parser_new_pb(size_t size) { struct parser_pb *pb; pb = mem_malloc(sizeof(*pb), m_parser); if (pb == NULL) { logmsgx("parser_new_pb: mem_malloc failed"); return (NULL); } if (size == 0) { pb->buf_size = PB_SIZE; pb->buf = mem_malloc(PB_SIZE, m_parser); if (pb->buf == NULL) { logmsgx("parser_new_pb: mem_malloc failed"); mem_free(pb, m_parser); return (NULL); } } else { pb->buf_size = size; pb->buf = NULL; } pb->buf_nread = pb->buf_off = 0; pb->fname = NULL; pb->sname = NULL; pb->fp = NULL; pb->lineno = 1; return (pb); } /* * Push pb to the top of pb_stack and set curchar pointer. If the * given pb is a macro variable's value, then check for loops. */ int parser_push_pb(struct parser_pb *pb1) { const struct parser_pb *pb; struct parser_pb *pb2; pb = SLIST_FIRST(&pb_stack); #if DEBUG_PARSER if (pb != NULL) { debuglog("CURPBUF: "); debuglog("file %s", pb->fname); if (pb->sname != NULL) debuglog(" (sym ${%s} mem <%s>)", pb->sname, pb->buf); debuglog(" size %lu, off %lu, nread %lu\n", (unsigned long)pb->buf_size, (unsigned long)pb->buf_off, (unsigned long)pb->buf_nread); } debuglog("PBUF PUSH: "); if (pb1->fname != NULL) debuglog("file %s", pb1->fname); else debuglog("sym ${%s} mem <%s>", pb1->sname, pb1->buf); debuglog(" size %lu, off %lu, nread %lu\n", (unsigned long)pb1->buf_size, (unsigned long)pb1->buf_off, (unsigned long)pb1->buf_nread); #endif /* DEBUG_PARSER */ /* Check for loopbacks in macro variable expanding. */ if (pb1->sname != NULL) SLIST_FOREACH(pb2, &pb_stack, link) if (pb2->sname != NULL) if (strcmp(pb2->sname, pb1->sname) == 0) { error_logmsgx("parser_push_pb: loop" "back is detected while expanding " "${%s} macro variable", pb1->sname); return (-1); } /* Link just created new pb. */ SLIST_INSERT_HEAD(&pb_stack, pb1, link); if (pb1->fname == NULL) { /* This is needed only for log functions. */ pb1->fname = pb->fname; pb1->lineno = pb->lineno; } /* ...-1 for parser_get_curchar(). */ curchar = pb1->buf - 1; return (0); } /* * Pop pb from the top of pb_stack and restore curchar pointer. */ struct parser_pb * parser_pop_pb(void) { struct parser_pb *pb; /* Unlink pb. */ pb = SLIST_FIRST(&pb_stack); SLIST_REMOVE_HEAD(&pb_stack, link); /* For macro variables buffer is shared with macro variable's value. */ if (pb->fp != NULL) mem_free(pb->buf, m_parser); /* Release memory used by pb structure. */ mem_free(pb, m_parser); /* Get new pb and set curchar, ...-1 for parser_get_curchar(). */ pb = SLIST_FIRST(&pb_stack); if (pb != NULL) curchar = pb->buf + pb->buf_off - 1; #if DEBUG_PARSER if (pb == NULL) debuglog("PBUF POP: NULL\n"); else { debuglog("PBUF POP: "); debuglog("file %s", pb->fname); if (pb->sname != NULL) debuglog(" (sym ${%s} mem <%s>)", pb->sname, pb->buf); debuglog(" %lu bytes, off %lu, nread %lu\n", (unsigned long)pb->buf_size, (unsigned long)pb->buf_off, (unsigned long)pb->buf_nread); } #endif return (pb); } /* * Return pointer to top pb. */ struct parser_pb * parser_top_pb(void) { return (SLIST_FIRST(&pb_stack)); } /* * Initialize whatis_char table (only once), {token,args,sym}_buf, * symbol tables and pb_stack. This function can be called, * even if parser_read_string() returned an error. */ int parser_init(void) { static char called_before = 0; if (parser_vlogmsgx == NULL) { /* Cannot even log about problem. */ return (-1); } /* * Initialize whatis_char only once. Cannot use static table, * because we should honor locale (isxxxx() functions). */ if (!called_before) { unsigned int ch; called_before = 1; whatis_char = malloc(2 << (CHAR_BIT - 1)); if (whatis_char == NULL) { logmsgx("parser_init: malloc: %s", strerror(errno)); return (-1); } for (ch = 0; ch < (2 << (CHAR_BIT - 1)); ++ch) if (isalpha(ch)) whatis_char[ch] = CH_ALPHA; else if (ispunct(ch)) whatis_char[ch] = CH_PUNCT; else if (isdigit(ch)) whatis_char[ch] = CH_DIGIT; else whatis_char[ch] = CH_OTHER; whatis_char['\t'] = CH_SPACE; whatis_char['\n'] = CH_NEWLINE; whatis_char[' '] = CH_SPACE; whatis_char['\"'] = CH_QUOTE; whatis_char['#'] = CH_POUND; whatis_char['$'] = CH_DOLLAR; whatis_char['/'] = CH_SLASH; whatis_char[':'] = CH_COLON; whatis_char[';'] = CH_SEMICOLON; whatis_char['='] = CH_EQUAL; whatis_char['\\'] = CH_BACKSLASH; whatis_char['_'] = CH_UNDERSCORE; whatis_char['{'] = CH_OPEN_BRACE; whatis_char['}'] = CH_CLOSE_BRACE; } /* Initialize token_buf, args_buf and sym_buf. */ token_buf.buf = args_buf.buf = sym_buf.buf = NULL; token_buf.size = args_buf.size = sym_buf.size = 0; /* Initialize symbol tables. */ TAILQ_INIT(&global_sym_list); TAILQ_INIT(&local_sym_list); /* Initialize pb_stack. */ SLIST_INIT(&pb_stack); section_cnt = 0; in_string_flag = was_space_flag = sym_def_flag = 0; return (0); } /* * Do last check of syntax and release all memory except whatis_char * table allocated in parser_init(). */ int parser_deinit(void) { /* Last check of syntax. */ if (section_cnt) { syntax_logmsgx("section is not closed"); return (-1); } /* Release internal memory. */ mem_free(token_buf.buf, m_parser); mem_free(args_buf.buf, m_parser); mem_free(sym_buf.buf, m_parser); /* * Deinitialize global symbol table, local one automatically * was deinitialized. */ free_sym_list(&global_sym_list); return (0); } /* * Add the given character to the current buffer, if there is not * enough space in the buffer, then realloc() it. */ static int parser_add_char(unsigned char ch) { char *buf; if (cur_buf->off == cur_buf->size) { if (cur_buf->size == 0) cur_buf->size = PB_SIZE; else { cur_buf->size *= 2; if (cur_buf->size > PB_SIZE_MAX) { error_logmsgx("parser_add_char: " "too big buffer size %llu", (unsigned long long)cur_buf->size); return (-1); } } buf = mem_realloc(cur_buf->buf, cur_buf->size, m_parser); if (buf == NULL) { error_logmsgx("parser_add_char: mem_realloc failed"); return (-1); } cur_buf->buf = buf; } cur_buf->buf[cur_buf->off++] = ch; return (0); } /* * Get next curchar from the configuration stream. * Return: * AC_RET_CONT, if curchar was successfully read; * AC_RET_ERR, if an error occurred; * AC_RET_EOF, if EOF occurred. */ static int parser_get_curchar(void) { struct parser_pb *pb; pb = SLIST_FIRST(&pb_stack); for (;;) { if (pb->buf_off < pb->buf_nread) { /* Get curchar from the buffer. */ curchar++; if (*curchar == '\n' && pb->fp != NULL) pb->lineno++; pb->buf_off++; return (AC_RET_CONT); } if (pb->fp != NULL) { /* Pb content from the file. */ pb->buf_nread = fread(pb->buf, sizeof(char), pb->buf_size, pb->fp); if (pb->buf_nread == 0) { if (feof(pb->fp) != 0) return (AC_RET_EOF); error_logmsgx("parser_get_curchar: fread(%s): " "%s", pb->fname, strerror(errno)); return (AC_RET_ERR); } pb->buf_off = 0; curchar = pb->buf - 1; } else { /* This pb ended, use previous one. */ pb = parser_pop_pb(); } } /* NOTREACHED */ } /* * Put curchar back to the buffer, this function must be called * no more than one time after parser_get_curchar(), because it * cannot restore pb_stack. */ static int parser_put_curchar(void) { struct parser_pb *pb; pb = SLIST_FIRST(&pb_stack); if (pb->buf_off == 0) { error_logmsgx("internal error: parser_put_curchar: " "buf_off == 0"); return (-1); } /* Put curchar to the buffer. */ if (*curchar == '\n' && pb->fp != NULL) pb->lineno--; curchar--; pb->buf_off--; return (0); } #if DEBUG_PARSER /* * Output current character, current state and new state and action. */ static void print_curchar_and_stac(void) { switch (*curchar) { case '\n': debuglog("\\n"); break; case '\t': debuglog("\\t"); break; case ' ': debuglog("sp"); break; default: if (isprint(*curchar)) debuglog(" %c", *curchar); else debuglog("%02x", *curchar); } debuglog(" : %s -> %s : %s (%s)\n", st_msg[curstate], curstac->action != AC_ERR ? st_msg[curstac->state] : er_msg[curstac->state], ac_msg[curstac->action], ch_msg[whatis_char[*curchar]]); } /* * Output current token and argument. */ static void print_token_and_args(void) { switch (parser_token_id) { case TOKEN_ID_SECTION_BEGIN: debuglog("SECTION_BEGIN: TOKEN <%s>", parser_token); break; case TOKEN_ID_SECTION_END: debuglog("SECTION_END"); break; case TOKEN_ID_PARAMETER: debuglog("PARAMETER: TOKEN <%s>", parser_token); break; default: debuglog("Unknown token ID %u", parser_token_id); } if (parser_args != NULL) debuglog(", ARGS <%s>", parser_args); debuglog("\n"); } #endif /* DEBUG_PARSER */ static void init_read_string(void) { cur_buf = &token_buf; parser_args = NULL; parser_nargs = 0; token_buf.off = args_buf.off = 0; was_space_flag = sym_def_flag = 0; quotes_cnt = 0; } /* * Read one logical string from the configuration stream. * Macro variables definitions are not returned, instead they * are saved in local symbols tables. * Return: * 1, if one string was successfully read; * -1, if syntax or another error occurred; * 0, if EOF occurred. */ int parser_read_string(void) { init_read_string(); for (;;) { switch (parser_get_curchar()) { case AC_RET_CONT: curstac = &stac_tbl[whatis_char[*curchar]][curstate]; #if DEBUG_PARSER print_curchar_and_stac(); #endif switch (ac_func_tbl[curstac->action]()) { case AC_RET_CONT: curstate = curstac->state; break; case AC_RET_LINE: #if DEBUG_PARSER print_token_and_args(); #endif curstate = curstac->state; return (1); case AC_RET_CONTX: break; case AC_RET_ERR: return (-1); case AC_RET_EOF: goto end_of_file; } break; case AC_RET_EOF: goto end_of_file; case AC_RET_ERR: return (-1); } } end_of_file: if (curstate != ST_INI) { if (in_string_flag) syntax_logmsgx("string is not closed with '\\\"' " "and end of file occurred"); else syntax_logmsgx("unexpected end of file"); return (-1); } return (0); } /* * Skip current character, continue. */ static int ac_skp(void) { return (AC_RET_CONT); } /* * Add curchar to the arguments buffer. */ static int ac_aar(void) { if (parser_add_char(*curchar) < 0) return (AC_RET_ERR); if (was_space_flag) { was_space_flag = 0; parser_nargs++; } return (AC_RET_CONT); } /* * Add curchar to the token buffer. */ static int ac_atk(void) { if (parser_add_char(*curchar) < 0) return (AC_RET_ERR); return (AC_RET_CONT); } /* * Add curchar '\"' to the arguments buffer, increase number * of double quotes and change in_string_flag. */ static int ac_aqu(void) { int rv; rv = ac_aar(); if (rv != AC_RET_CONT) return (rv); ++quotes_cnt; in_string_flag = !in_string_flag; return (AC_RET_CONT); } /* * Definition of the token is complete. Switch current conf_buf * to the arguments buffer. */ static int ac_tok(void) { if (parser_add_char('\0') < 0) return (AC_RET_ERR); cur_buf = &args_buf; return (AC_RET_CONT); } /* * If configuration buffers are not complete (there is not '\0' * character at the end for example), then this function must be called. * Set parser_token and parser_args pointers. */ static int complete_conf_bufs(void) { if (cur_buf == &token_buf) { if (parser_add_char('\0') < 0) return (-1); } else if (args_buf.off != 0) { char *ptr; ptr = args_buf.buf + args_buf.off - 1; switch (*ptr) { case ' ': case '\t': *ptr = '\0'; break; default: if (parser_add_char('\0') < 0) return (-1); } parser_args = args_buf.buf; parser_args_len = args_buf.off - 1; parser_nargs++; } parser_token = token_buf.buf; return (0); } /* * Definition of the parameter or macro variable is complete. */ static int ac_par(void) { if (complete_conf_bufs() < 0) return (AC_RET_ERR); if (sym_def_flag) { /* Definition of a macro variable. */ char *val; if (!parser_arg_is_str()) { syntax_logmsgx("macro variable's value should be a " "string"); return (AC_RET_ERR); } val = parser_strdup(args_buf.buf, m_parser); if (val == NULL) { error_logmsgx("cannot allocate memory for value of " "${%s} macro variable", sym_buf.buf); return (AC_RET_ERR); } if (section_cnt == 0) { if (parser_global_sym_add(sym_buf.buf, val, 1) < 0) { error_logmsgx("cannot register " "a macro variable"); return (-1); } } else { if (parser_local_sym_add(sym_buf.buf, val, 1) < 0) { error_logmsgx("cannot register " "a macro variable"); return (-1); } } mem_free(val, m_parser); init_read_string(); curstate = ST_INI; return (AC_RET_CONTX); } parser_token_id = TOKEN_ID_PARAMETER; return (AC_RET_LINE); } /* * Begin of the section. */ static int ac_bos(void) { if (sym_def_flag) { /* Looks like incorrect definition of a macro variable. */ syntax_logmsgx("macro variable's value should be a " "string or wrong usage or macro variable definition"); return (AC_RET_ERR); } if (complete_conf_bufs() < 0) return (AC_RET_ERR); ++section_cnt; parser_token_id = TOKEN_ID_SECTION_BEGIN; return (AC_RET_LINE); } /* * End of the section. */ static int ac_eos(void) { if (section_cnt == 0) { syntax_logmsgx("unmatched closed curly brace"); return (AC_RET_ERR); } if (--section_cnt == 0) free_sym_list(&local_sym_list); parser_token_id = TOKEN_ID_SECTION_END; return (AC_RET_LINE); } /* * '#' appeared, read current line up to the '\n' or EOF. */ static int ac_pnd(void) { int rv; for (;;) { rv = parser_get_curchar(); if (rv != AC_RET_CONT) return (rv); if (*curchar == '\n') break; } return (AC_RET_CONT); } /* * '\\' appeared in a string, check if the next character is either * '\"', '\\', 't', 'n' or '\n'. If the next character is '\n', then * ignore '\\' and '\n'; if next character is 't' or 'n', then convert * this sequence to real character, else add "\\x" character sequence * to a string. */ static int ac_fmt(void) { int rv; rv = parser_get_curchar(); if (rv != AC_RET_CONT) { if (rv == AC_RET_EOF) syntax_error(ER_FMT); return (rv); } switch (*curchar) { case '\"': case '\\': if (parser_add_char('\\') < 0 || parser_add_char(*curchar) < 0) return (AC_RET_ERR); break; case 'n': if (parser_add_char('\n') < 0) return (AC_RET_ERR); break; case 't': if (parser_add_char('\t') < 0) return (AC_RET_ERR); break; case '\n': break; default: syntax_error(ER_FMT); return (AC_RET_ERR); } return (AC_RET_CONT); } /* * Space in the argument. */ static int ac_asp(void) { if (!was_space_flag && args_buf.off != 0) { if (parser_add_char(' ') < 0) return (AC_RET_ERR); was_space_flag = 1; } return (AC_RET_CONT); } /* * '/' appeared. To not make stac_tbl too complex here, there are * some tricks in checking if '/' can appear in current state. */ static int ac_slh(void) { int rv; rv = parser_get_curchar(); if (rv != AC_RET_CONT) { if (rv == AC_RET_EOF) syntax_error(ER_UXP); return (rv); } if (*curchar != '*') { if (curstate == ST_INI || curstate == ST_TOK) { syntax_error(ER_UXP); return (AC_RET_ERR); } if (parser_add_char('/') < 0) return (AC_RET_ERR); if (parser_put_curchar() < 0) return (AC_RET_ERR); curstate = ST_ARG; return (AC_RET_CONTX); } else { int was_star; was_star = 0; for (;;) { rv = parser_get_curchar(); if (rv != AC_RET_CONT) { if (rv == AC_RET_EOF) { syntax_logmsgx("C-like comment is not " "closed"); return (AC_RET_ERR); } return (rv); } if (*curchar == '/') { if (was_star) break; } else was_star = *curchar == '*'; } return (AC_RET_CONT); } } /* * '$' appeared. To not make stac_tbl too complex here, we will decide * if it is the begin of a macro variable definition or we need to * expand a macro variable. */ static int ac_dlr(void) { struct conf_buf *next_buf; int rv; if (sym_def_flag) { if (parser_add_char('$') < 0) return (AC_RET_ERR); return (AC_RET_CONT); } rv = parser_get_curchar(); if (rv != AC_RET_CONT) { if (rv == AC_RET_EOF) syntax_error(ER_UXP); return (rv); } if (*curchar != '{') { syntax_logmsgx("wrong usage of character '$': " "character '{' was missed"); return (AC_RET_ERR); } next_buf = cur_buf; /* Get name of a macro variable. */ cur_buf = &sym_buf; sym_buf.off = 0; for (;;) { rv = parser_get_curchar(); if (rv != AC_RET_CONT) return (rv); switch (whatis_char[*curchar]) { case CH_UNDERSCORE: case CH_ALPHA: case CH_DIGIT: case CH_DOLLAR: if (parser_add_char(*curchar) < 0) return (AC_RET_ERR); break; case CH_CLOSE_BRACE: if (parser_add_char('\0') < 0) return (AC_RET_ERR); goto got_sym_name; default: syntax_logmsgx("this character is not allowed in " "macro variable name"); return (AC_RET_ERR); } } got_sym_name: switch (sym_buf.off) { case 1: syntax_logmsgx("empty macro variable's name is not allowed"); return (AC_RET_ERR); case 2: if (sym_buf.buf[0] == '$') { /* ${$} */ if (curstate == ST_INI) { syntax_logmsgx("macro variable ${$} is " "reserved, cannot redefine it"); return (AC_RET_ERR); } /* * Expand ${$} right now to allow to insert * '$' character. */ cur_buf = &args_buf; if (parser_add_char('$') < 0) return (AC_RET_ERR); break; } /* FALLTHROUGH */ default: if (curstate == ST_INI) { /* Symbol definition. */ next_buf = &args_buf; sym_def_flag = 1; } else { /* Expand symbol. */ struct parser_pb *pb; struct psym *psym; psym = find_sym(&local_sym_list, sym_buf.buf); if (psym == NULL) psym = find_sym(&global_sym_list, sym_buf.buf); if (psym == NULL) { syntax_logmsgx("unknown macro variable ${%s}", sym_buf.buf); return (AC_RET_ERR); } pb = parser_new_pb(psym->val_len); if (pb == NULL) { error_logmsgx("cannot expand macro variable " "${%s}", sym_buf.buf); return (AC_RET_ERR); } pb->buf = (unsigned char *)psym->val; pb->buf_nread = psym->val_len; pb->sname = psym->sym; if (parser_push_pb(pb) < 0) return (AC_RET_ERR); } } cur_buf = next_buf; return (AC_RET_CONT); } /* * A syntax error occurred. */ static int ac_err(void) { syntax_error(curstac->state); return (AC_RET_ERR); } /* * Return non-zero if the given buffer is a string. */ int parser_buf_is_str(const char *s) { if (*s != '\"') return (0); for (++s; *s != '\0'; ++s) switch (*s) { case '\\': /* "\\x" */ ++s; break; case '\"': /* Last '\"'. */ return (*(s + 1) == '\0'); } return (0); } /* * Return non-zero if whole parser_args is a string. * This is the optimized version, no linear search. */ int parser_arg_is_str(void) { return (parser_nargs == 1 && quotes_cnt == 2 && parser_args[0] == '\"' && parser_args[parser_args_len - 1] == '\"'); } /* * Allocate buffer and copy string from str converting '\\'-like * sequences to real characters (thanks to ac_fmt() there are only * two such sequences \" and \\). */ char * parser_strdup(char *str, void *mem_type) { const char *ptr; char *res, *mod; size_t len; int has_special; len = strlen(str) - 1; res = mem_malloc(len, mem_type); if (res == NULL) { error_logmsgx("parser_strdup: mem_malloc failed"); return (NULL); } has_special = 0; for (mod = res, ptr = str + 1, len = 0; *ptr != '\"'; ++len, ++mod, ++ptr) if (*ptr == '\\') { has_special = 1; /* \" or \\. */ *mod = *++ptr; } else *mod = *ptr; *mod = '\0'; if (has_special) { mod = mem_realloc(res, ++len, mem_type); if (mod == NULL) error_logmsgx("parser_strdup: mem_realloc failed"); res = mod; } return (res); } /* * Allocate buffer and convert a string pointed by ptr to * this buffer expanding all escape sequences. */ char * parser_stringify(const char *ptr) { char *mod; /* Can free, because mem_malloc can return NULL. */ mem_free(parser_str_buf, m_parser); /* Allocated buffer will be bigger than really needed. */ parser_str_buf = mem_malloc(2 + 1 + strlen(ptr) * 2, m_parser); if (parser_str_buf == NULL) return ("(parser_stringify: mem_malloc failed)"); *parser_str_buf = '\"'; for (mod = parser_str_buf + 1; *ptr != '\0'; ++mod, ++ptr) switch (*ptr) { case '\\': case '\"': case '\t': case '\n': *mod = '\\'; ++mod; /* FALLTHROUGH */ default: *mod = *ptr; } *mod = '\"'; *(mod + 1) = '\0'; return (parser_str_buf); }