1*070c62a6SFranco Fichtner /* $Id: mdoc_argv.c,v 1.95 2014/07/06 19:09:00 schwarze Exp $ */ 280387638SSascha Wildner /* 336342e81SSascha Wildner * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4f88b6c16SFranco Fichtner * Copyright (c) 2012 Ingo Schwarze <schwarze@openbsd.org> 580387638SSascha Wildner * 680387638SSascha Wildner * Permission to use, copy, modify, and distribute this software for any 780387638SSascha Wildner * purpose with or without fee is hereby granted, provided that the above 880387638SSascha Wildner * copyright notice and this permission notice appear in all copies. 980387638SSascha Wildner * 1080387638SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1180387638SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1280387638SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1380387638SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1480387638SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1580387638SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1680387638SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1780387638SSascha Wildner */ 1880387638SSascha Wildner #ifdef HAVE_CONFIG_H 1980387638SSascha Wildner #include "config.h" 2080387638SSascha Wildner #endif 2180387638SSascha Wildner 2280387638SSascha Wildner #include <sys/types.h> 2380387638SSascha Wildner 2480387638SSascha Wildner #include <assert.h> 2580387638SSascha Wildner #include <stdlib.h> 2680387638SSascha Wildner #include <stdio.h> 2780387638SSascha Wildner #include <string.h> 2880387638SSascha Wildner 2960e1e752SSascha Wildner #include "mdoc.h" 3080387638SSascha Wildner #include "mandoc.h" 31*070c62a6SFranco Fichtner #include "mandoc_aux.h" 3280387638SSascha Wildner #include "libmdoc.h" 3380387638SSascha Wildner #include "libmandoc.h" 3480387638SSascha Wildner 3560e1e752SSascha Wildner #define MULTI_STEP 5 /* pre-allocate argument values */ 36a4c7eb57SSascha Wildner #define DELIMSZ 6 /* max possible size of a delimiter */ 37a4c7eb57SSascha Wildner 38a4c7eb57SSascha Wildner enum argsflag { 39a4c7eb57SSascha Wildner ARGSFL_NONE = 0, 40a4c7eb57SSascha Wildner ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */ 41a4c7eb57SSascha Wildner ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */ 42a4c7eb57SSascha Wildner }; 43a4c7eb57SSascha Wildner 44a4c7eb57SSascha Wildner enum argvflag { 45a4c7eb57SSascha Wildner ARGV_NONE, /* no args to flag (e.g., -split) */ 46a4c7eb57SSascha Wildner ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ 47f88b6c16SFranco Fichtner ARGV_MULTI /* multiple args (e.g., -column xxx yyy) */ 48a4c7eb57SSascha Wildner }; 4980387638SSascha Wildner 5036342e81SSascha Wildner struct mdocarg { 5136342e81SSascha Wildner enum argsflag flags; 5236342e81SSascha Wildner const enum mdocargt *argvs; 5336342e81SSascha Wildner }; 5436342e81SSascha Wildner 5536342e81SSascha Wildner static void argn_free(struct mdoc_arg *, int); 5680387638SSascha Wildner static enum margserr args(struct mdoc *, int, int *, 57a4c7eb57SSascha Wildner char *, enum argsflag, char **); 58a4c7eb57SSascha Wildner static int args_checkpunct(const char *, int); 5936342e81SSascha Wildner static int argv_multi(struct mdoc *, int, 6080387638SSascha Wildner struct mdoc_argv *, int *, char *); 6136342e81SSascha Wildner static int argv_single(struct mdoc *, int, 6280387638SSascha Wildner struct mdoc_argv *, int *, char *); 6380387638SSascha Wildner 6460e1e752SSascha Wildner static const enum argvflag argvflags[MDOC_ARG_MAX] = { 6580387638SSascha Wildner ARGV_NONE, /* MDOC_Split */ 6680387638SSascha Wildner ARGV_NONE, /* MDOC_Nosplit */ 6780387638SSascha Wildner ARGV_NONE, /* MDOC_Ragged */ 6880387638SSascha Wildner ARGV_NONE, /* MDOC_Unfilled */ 6980387638SSascha Wildner ARGV_NONE, /* MDOC_Literal */ 7080387638SSascha Wildner ARGV_SINGLE, /* MDOC_File */ 71f88b6c16SFranco Fichtner ARGV_SINGLE, /* MDOC_Offset */ 7280387638SSascha Wildner ARGV_NONE, /* MDOC_Bullet */ 7380387638SSascha Wildner ARGV_NONE, /* MDOC_Dash */ 7480387638SSascha Wildner ARGV_NONE, /* MDOC_Hyphen */ 7580387638SSascha Wildner ARGV_NONE, /* MDOC_Item */ 7680387638SSascha Wildner ARGV_NONE, /* MDOC_Enum */ 7780387638SSascha Wildner ARGV_NONE, /* MDOC_Tag */ 7880387638SSascha Wildner ARGV_NONE, /* MDOC_Diag */ 7980387638SSascha Wildner ARGV_NONE, /* MDOC_Hang */ 8080387638SSascha Wildner ARGV_NONE, /* MDOC_Ohang */ 8180387638SSascha Wildner ARGV_NONE, /* MDOC_Inset */ 8280387638SSascha Wildner ARGV_MULTI, /* MDOC_Column */ 83f88b6c16SFranco Fichtner ARGV_SINGLE, /* MDOC_Width */ 8480387638SSascha Wildner ARGV_NONE, /* MDOC_Compact */ 8580387638SSascha Wildner ARGV_NONE, /* MDOC_Std */ 8680387638SSascha Wildner ARGV_NONE, /* MDOC_Filled */ 8780387638SSascha Wildner ARGV_NONE, /* MDOC_Words */ 8880387638SSascha Wildner ARGV_NONE, /* MDOC_Emphasis */ 8980387638SSascha Wildner ARGV_NONE, /* MDOC_Symbolic */ 9080387638SSascha Wildner ARGV_NONE /* MDOC_Symbolic */ 9180387638SSascha Wildner }; 9280387638SSascha Wildner 9360e1e752SSascha Wildner static const enum mdocargt args_Ex[] = { 9460e1e752SSascha Wildner MDOC_Std, 9560e1e752SSascha Wildner MDOC_ARG_MAX 9660e1e752SSascha Wildner }; 9760e1e752SSascha Wildner 9860e1e752SSascha Wildner static const enum mdocargt args_An[] = { 9960e1e752SSascha Wildner MDOC_Split, 10060e1e752SSascha Wildner MDOC_Nosplit, 10160e1e752SSascha Wildner MDOC_ARG_MAX 10260e1e752SSascha Wildner }; 10360e1e752SSascha Wildner 10460e1e752SSascha Wildner static const enum mdocargt args_Bd[] = { 10560e1e752SSascha Wildner MDOC_Ragged, 10660e1e752SSascha Wildner MDOC_Unfilled, 10760e1e752SSascha Wildner MDOC_Filled, 10860e1e752SSascha Wildner MDOC_Literal, 10960e1e752SSascha Wildner MDOC_File, 11060e1e752SSascha Wildner MDOC_Offset, 11160e1e752SSascha Wildner MDOC_Compact, 11260e1e752SSascha Wildner MDOC_Centred, 11360e1e752SSascha Wildner MDOC_ARG_MAX 11460e1e752SSascha Wildner }; 11560e1e752SSascha Wildner 11660e1e752SSascha Wildner static const enum mdocargt args_Bf[] = { 11760e1e752SSascha Wildner MDOC_Emphasis, 11860e1e752SSascha Wildner MDOC_Literal, 11960e1e752SSascha Wildner MDOC_Symbolic, 12060e1e752SSascha Wildner MDOC_ARG_MAX 12160e1e752SSascha Wildner }; 12260e1e752SSascha Wildner 12360e1e752SSascha Wildner static const enum mdocargt args_Bk[] = { 12460e1e752SSascha Wildner MDOC_Words, 12560e1e752SSascha Wildner MDOC_ARG_MAX 12660e1e752SSascha Wildner }; 12760e1e752SSascha Wildner 12860e1e752SSascha Wildner static const enum mdocargt args_Bl[] = { 12960e1e752SSascha Wildner MDOC_Bullet, 13060e1e752SSascha Wildner MDOC_Dash, 13160e1e752SSascha Wildner MDOC_Hyphen, 13260e1e752SSascha Wildner MDOC_Item, 13360e1e752SSascha Wildner MDOC_Enum, 13460e1e752SSascha Wildner MDOC_Tag, 13560e1e752SSascha Wildner MDOC_Diag, 13660e1e752SSascha Wildner MDOC_Hang, 13760e1e752SSascha Wildner MDOC_Ohang, 13860e1e752SSascha Wildner MDOC_Inset, 13960e1e752SSascha Wildner MDOC_Column, 14060e1e752SSascha Wildner MDOC_Width, 14160e1e752SSascha Wildner MDOC_Offset, 14260e1e752SSascha Wildner MDOC_Compact, 14360e1e752SSascha Wildner MDOC_Nested, 14460e1e752SSascha Wildner MDOC_ARG_MAX 14560e1e752SSascha Wildner }; 14680387638SSascha Wildner 14736342e81SSascha Wildner static const struct mdocarg mdocargs[MDOC_MAX] = { 148f88b6c16SFranco Fichtner { ARGSFL_DELIM, NULL }, /* Ap */ 14936342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Dd */ 15036342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Dt */ 15136342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Os */ 15236342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Sh */ 15336342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Ss */ 15436342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Pp */ 15536342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* D1 */ 15636342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Dl */ 15736342e81SSascha Wildner { ARGSFL_NONE, args_Bd }, /* Bd */ 15836342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Ed */ 15936342e81SSascha Wildner { ARGSFL_NONE, args_Bl }, /* Bl */ 16036342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* El */ 16136342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* It */ 16236342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ad */ 16336342e81SSascha Wildner { ARGSFL_DELIM, args_An }, /* An */ 16436342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ar */ 165f88b6c16SFranco Fichtner { ARGSFL_DELIM, NULL }, /* Cd */ 16636342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Cm */ 16736342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Dv */ 16836342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Er */ 16936342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ev */ 17036342e81SSascha Wildner { ARGSFL_NONE, args_Ex }, /* Ex */ 17136342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Fa */ 17236342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Fd */ 17336342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Fl */ 17436342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Fn */ 17536342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ft */ 17636342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ic */ 177f88b6c16SFranco Fichtner { ARGSFL_DELIM, NULL }, /* In */ 17836342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Li */ 17936342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Nd */ 18036342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Nm */ 18136342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Op */ 182*070c62a6SFranco Fichtner { ARGSFL_DELIM, NULL }, /* Ot */ 18336342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Pa */ 18436342e81SSascha Wildner { ARGSFL_NONE, args_Ex }, /* Rv */ 18536342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* St */ 18636342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Va */ 18736342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Vt */ 18836342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Xr */ 18936342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %A */ 19036342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %B */ 19136342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %D */ 19236342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %I */ 19336342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %J */ 19436342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %N */ 19536342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %O */ 19636342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %P */ 19736342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %R */ 19836342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %T */ 19936342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %V */ 20036342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ac */ 20136342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Ao */ 20236342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Aq */ 20336342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* At */ 20436342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Bc */ 20536342e81SSascha Wildner { ARGSFL_NONE, args_Bf }, /* Bf */ 20636342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Bo */ 20736342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Bq */ 20836342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Bsx */ 20936342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Bx */ 21036342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Db */ 21136342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Dc */ 21236342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Do */ 21336342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Dq */ 21436342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ec */ 21536342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Ef */ 21636342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Em */ 21736342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Eo */ 21836342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Fx */ 21936342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ms */ 22036342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* No */ 22136342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ns */ 22236342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Nx */ 22336342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ox */ 22436342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Pc */ 22536342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Pf */ 22636342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Po */ 22736342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Pq */ 22836342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Qc */ 22936342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ql */ 23036342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Qo */ 23136342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Qq */ 23236342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Re */ 23336342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Rs */ 23436342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Sc */ 23536342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* So */ 23636342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Sq */ 23736342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Sm */ 23836342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Sx */ 23936342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Sy */ 24036342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Tn */ 24136342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Ux */ 24236342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Xc */ 24336342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Xo */ 24436342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Fo */ 245f88b6c16SFranco Fichtner { ARGSFL_DELIM, NULL }, /* Fc */ 24636342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Oo */ 24736342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Oc */ 24836342e81SSascha Wildner { ARGSFL_NONE, args_Bk }, /* Bk */ 24936342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Ek */ 25036342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Bt */ 25136342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Hf */ 252*070c62a6SFranco Fichtner { ARGSFL_DELIM, NULL }, /* Fr */ 25336342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Ud */ 254f88b6c16SFranco Fichtner { ARGSFL_DELIM, NULL }, /* Lb */ 25536342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Lp */ 25636342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Lk */ 25736342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Mt */ 25836342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Brq */ 25936342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Bro */ 26036342e81SSascha Wildner { ARGSFL_DELIM, NULL }, /* Brc */ 26136342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %C */ 26236342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Es */ 263*070c62a6SFranco Fichtner { ARGSFL_DELIM, NULL }, /* En */ 2647888c61dSFranco Fichtner { ARGSFL_DELIM, NULL }, /* Dx */ 26536342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %Q */ 26636342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* br */ 26736342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* sp */ 26836342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* %U */ 26936342e81SSascha Wildner { ARGSFL_NONE, NULL }, /* Ta */ 270*070c62a6SFranco Fichtner { ARGSFL_NONE, NULL }, /* ll */ 27136342e81SSascha Wildner }; 27236342e81SSascha Wildner 27336342e81SSascha Wildner 27480387638SSascha Wildner /* 27580387638SSascha Wildner * Parse an argument from line text. This comes in the form of -key 27680387638SSascha Wildner * [value0...], which may either have a single mandatory value, at least 27780387638SSascha Wildner * one mandatory value, an optional single value, or no value. 27880387638SSascha Wildner */ 27980387638SSascha Wildner enum margverr 280f88b6c16SFranco Fichtner mdoc_argv(struct mdoc *mdoc, int line, enum mdoct tok, 28180387638SSascha Wildner struct mdoc_arg **v, int *pos, char *buf) 28280387638SSascha Wildner { 28380387638SSascha Wildner char *p, sv; 28480387638SSascha Wildner struct mdoc_argv tmp; 28580387638SSascha Wildner struct mdoc_arg *arg; 28636342e81SSascha Wildner const enum mdocargt *ap; 28780387638SSascha Wildner 28880387638SSascha Wildner if ('\0' == buf[*pos]) 28980387638SSascha Wildner return(ARGV_EOLN); 29036342e81SSascha Wildner else if (NULL == (ap = mdocargs[tok].argvs)) 29136342e81SSascha Wildner return(ARGV_WORD); 29236342e81SSascha Wildner else if ('-' != buf[*pos]) 29336342e81SSascha Wildner return(ARGV_WORD); 29480387638SSascha Wildner 29536342e81SSascha Wildner /* Seek to the first unescaped space. */ 29680387638SSascha Wildner 29780387638SSascha Wildner p = &buf[++(*pos)]; 29880387638SSascha Wildner 29980387638SSascha Wildner assert(*pos > 0); 30080387638SSascha Wildner 30136342e81SSascha Wildner for ( ; buf[*pos] ; (*pos)++) 30236342e81SSascha Wildner if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) 30380387638SSascha Wildner break; 30480387638SSascha Wildner 30536342e81SSascha Wildner /* 30636342e81SSascha Wildner * We want to nil-terminate the word to look it up (it's easier 30736342e81SSascha Wildner * that way). But we may not have a flag, in which case we need 30836342e81SSascha Wildner * to restore the line as-is. So keep around the stray byte, 30936342e81SSascha Wildner * which we'll reset upon exiting (if necessary). 31036342e81SSascha Wildner */ 31180387638SSascha Wildner 31236342e81SSascha Wildner if ('\0' != (sv = buf[*pos])) 31380387638SSascha Wildner buf[(*pos)++] = '\0'; 31436342e81SSascha Wildner 31536342e81SSascha Wildner /* 31636342e81SSascha Wildner * Now look up the word as a flag. Use temporary storage that 31736342e81SSascha Wildner * we'll copy into the node's flags, if necessary. 31836342e81SSascha Wildner */ 31980387638SSascha Wildner 32060e1e752SSascha Wildner memset(&tmp, 0, sizeof(struct mdoc_argv)); 32136342e81SSascha Wildner 32280387638SSascha Wildner tmp.line = line; 32380387638SSascha Wildner tmp.pos = *pos; 32436342e81SSascha Wildner tmp.arg = MDOC_ARG_MAX; 32580387638SSascha Wildner 32636342e81SSascha Wildner while (MDOC_ARG_MAX != (tmp.arg = *ap++)) 32736342e81SSascha Wildner if (0 == strcmp(p, mdoc_argnames[tmp.arg])) 32836342e81SSascha Wildner break; 32980387638SSascha Wildner 33036342e81SSascha Wildner if (MDOC_ARG_MAX == tmp.arg) { 33136342e81SSascha Wildner /* 33236342e81SSascha Wildner * The flag was not found. 33336342e81SSascha Wildner * Restore saved zeroed byte and return as a word. 33436342e81SSascha Wildner */ 33580387638SSascha Wildner if (sv) 33680387638SSascha Wildner buf[*pos - 1] = sv; 33780387638SSascha Wildner return(ARGV_WORD); 33880387638SSascha Wildner } 33980387638SSascha Wildner 34036342e81SSascha Wildner /* Read to the next word (the argument). */ 34136342e81SSascha Wildner 34280387638SSascha Wildner while (buf[*pos] && ' ' == buf[*pos]) 34380387638SSascha Wildner (*pos)++; 34480387638SSascha Wildner 34536342e81SSascha Wildner switch (argvflags[tmp.arg]) { 346*070c62a6SFranco Fichtner case ARGV_SINGLE: 347f88b6c16SFranco Fichtner if ( ! argv_single(mdoc, line, &tmp, pos, buf)) 34880387638SSascha Wildner return(ARGV_ERROR); 34936342e81SSascha Wildner break; 350*070c62a6SFranco Fichtner case ARGV_MULTI: 351f88b6c16SFranco Fichtner if ( ! argv_multi(mdoc, line, &tmp, pos, buf)) 35236342e81SSascha Wildner return(ARGV_ERROR); 35336342e81SSascha Wildner break; 354*070c62a6SFranco Fichtner case ARGV_NONE: 35536342e81SSascha Wildner break; 35636342e81SSascha Wildner } 35780387638SSascha Wildner 35880387638SSascha Wildner if (NULL == (arg = *v)) 35980387638SSascha Wildner arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg)); 36080387638SSascha Wildner 36180387638SSascha Wildner arg->argc++; 362*070c62a6SFranco Fichtner arg->argv = mandoc_reallocarray(arg->argv, 363*070c62a6SFranco Fichtner arg->argc, sizeof(struct mdoc_argv)); 36480387638SSascha Wildner 365*070c62a6SFranco Fichtner memcpy(&arg->argv[(int)arg->argc - 1], &tmp, 366*070c62a6SFranco Fichtner sizeof(struct mdoc_argv)); 36780387638SSascha Wildner 36880387638SSascha Wildner return(ARGV_ARG); 36980387638SSascha Wildner } 37080387638SSascha Wildner 37180387638SSascha Wildner void 37280387638SSascha Wildner mdoc_argv_free(struct mdoc_arg *p) 37380387638SSascha Wildner { 37480387638SSascha Wildner int i; 37580387638SSascha Wildner 37680387638SSascha Wildner if (NULL == p) 37780387638SSascha Wildner return; 37880387638SSascha Wildner 37980387638SSascha Wildner if (p->refcnt) { 38080387638SSascha Wildner --(p->refcnt); 38180387638SSascha Wildner if (p->refcnt) 38280387638SSascha Wildner return; 38380387638SSascha Wildner } 38480387638SSascha Wildner assert(p->argc); 38580387638SSascha Wildner 38680387638SSascha Wildner for (i = (int)p->argc - 1; i >= 0; i--) 38760e1e752SSascha Wildner argn_free(p, i); 38880387638SSascha Wildner 38980387638SSascha Wildner free(p->argv); 39080387638SSascha Wildner free(p); 39180387638SSascha Wildner } 39280387638SSascha Wildner 39360e1e752SSascha Wildner static void 39460e1e752SSascha Wildner argn_free(struct mdoc_arg *p, int iarg) 39580387638SSascha Wildner { 39680387638SSascha Wildner struct mdoc_argv *arg; 39780387638SSascha Wildner int j; 39880387638SSascha Wildner 39980387638SSascha Wildner arg = &p->argv[iarg]; 40080387638SSascha Wildner 40180387638SSascha Wildner if (arg->sz && arg->value) { 40280387638SSascha Wildner for (j = (int)arg->sz - 1; j >= 0; j--) 40380387638SSascha Wildner free(arg->value[j]); 40480387638SSascha Wildner free(arg->value); 40580387638SSascha Wildner } 40680387638SSascha Wildner 40780387638SSascha Wildner for (--p->argc; iarg < (int)p->argc; iarg++) 40880387638SSascha Wildner p->argv[iarg] = p->argv[iarg+1]; 40980387638SSascha Wildner } 41080387638SSascha Wildner 41180387638SSascha Wildner enum margserr 412f88b6c16SFranco Fichtner mdoc_zargs(struct mdoc *mdoc, int line, int *pos, char *buf, char **v) 41380387638SSascha Wildner { 41480387638SSascha Wildner 415f88b6c16SFranco Fichtner return(args(mdoc, line, pos, buf, ARGSFL_NONE, v)); 41680387638SSascha Wildner } 41780387638SSascha Wildner 41880387638SSascha Wildner enum margserr 419f88b6c16SFranco Fichtner mdoc_args(struct mdoc *mdoc, int line, int *pos, 42080387638SSascha Wildner char *buf, enum mdoct tok, char **v) 42180387638SSascha Wildner { 422a4c7eb57SSascha Wildner enum argsflag fl; 42380387638SSascha Wildner struct mdoc_node *n; 42480387638SSascha Wildner 42536342e81SSascha Wildner fl = mdocargs[tok].flags; 42680387638SSascha Wildner 42780387638SSascha Wildner if (MDOC_It != tok) 428f88b6c16SFranco Fichtner return(args(mdoc, line, pos, buf, fl, v)); 42980387638SSascha Wildner 43080387638SSascha Wildner /* 43180387638SSascha Wildner * We know that we're in an `It', so it's reasonable to expect 43280387638SSascha Wildner * us to be sitting in a `Bl'. Someday this may not be the case 43380387638SSascha Wildner * (if we allow random `It's sitting out there), so provide a 43480387638SSascha Wildner * safe fall-back into the default behaviour. 43580387638SSascha Wildner */ 43680387638SSascha Wildner 437f88b6c16SFranco Fichtner for (n = mdoc->last; n; n = n->parent) 43880387638SSascha Wildner if (MDOC_Bl == n->tok) 439a4c7eb57SSascha Wildner if (LIST_column == n->norm->Bl.type) { 440a4c7eb57SSascha Wildner fl = ARGSFL_TABSEP; 44180387638SSascha Wildner break; 44280387638SSascha Wildner } 44380387638SSascha Wildner 444f88b6c16SFranco Fichtner return(args(mdoc, line, pos, buf, fl, v)); 44580387638SSascha Wildner } 44680387638SSascha Wildner 44780387638SSascha Wildner static enum margserr 448f88b6c16SFranco Fichtner args(struct mdoc *mdoc, int line, int *pos, 449a4c7eb57SSascha Wildner char *buf, enum argsflag fl, char **v) 45080387638SSascha Wildner { 45180387638SSascha Wildner char *p, *pp; 4527888c61dSFranco Fichtner int pairs; 45380387638SSascha Wildner enum margserr rc; 45480387638SSascha Wildner 45580387638SSascha Wildner if ('\0' == buf[*pos]) { 456f88b6c16SFranco Fichtner if (MDOC_PPHRASE & mdoc->flags) 45780387638SSascha Wildner return(ARGS_EOLN); 45880387638SSascha Wildner /* 45980387638SSascha Wildner * If we're not in a partial phrase and the flag for 46080387638SSascha Wildner * being a phrase literal is still set, the punctuation 46180387638SSascha Wildner * is unterminated. 46280387638SSascha Wildner */ 463f88b6c16SFranco Fichtner if (MDOC_PHRASELIT & mdoc->flags) 464*070c62a6SFranco Fichtner mandoc_msg(MANDOCERR_ARG_QUOTE, 465*070c62a6SFranco Fichtner mdoc->parse, line, *pos, NULL); 46680387638SSascha Wildner 467f88b6c16SFranco Fichtner mdoc->flags &= ~MDOC_PHRASELIT; 46880387638SSascha Wildner return(ARGS_EOLN); 46980387638SSascha Wildner } 47080387638SSascha Wildner 47180387638SSascha Wildner *v = &buf[*pos]; 47280387638SSascha Wildner 473a4c7eb57SSascha Wildner if (ARGSFL_DELIM == fl) 474a4c7eb57SSascha Wildner if (args_checkpunct(buf, *pos)) 47560e1e752SSascha Wildner return(ARGS_PUNCT); 47680387638SSascha Wildner 47780387638SSascha Wildner /* 47880387638SSascha Wildner * First handle TABSEP items, restricted to `Bl -column'. This 47980387638SSascha Wildner * ignores conventional token parsing and instead uses tabs or 48080387638SSascha Wildner * `Ta' macros to separate phrases. Phrases are parsed again 48180387638SSascha Wildner * for arguments at a later phase. 48280387638SSascha Wildner */ 48380387638SSascha Wildner 484a4c7eb57SSascha Wildner if (ARGSFL_TABSEP == fl) { 48580387638SSascha Wildner /* Scan ahead to tab (can't be escaped). */ 48680387638SSascha Wildner p = strchr(*v, '\t'); 48780387638SSascha Wildner pp = NULL; 48880387638SSascha Wildner 48980387638SSascha Wildner /* Scan ahead to unescaped `Ta'. */ 490f88b6c16SFranco Fichtner if ( ! (MDOC_PHRASELIT & mdoc->flags)) 49180387638SSascha Wildner for (pp = *v; ; pp++) { 49280387638SSascha Wildner if (NULL == (pp = strstr(pp, "Ta"))) 49380387638SSascha Wildner break; 49480387638SSascha Wildner if (pp > *v && ' ' != *(pp - 1)) 49580387638SSascha Wildner continue; 49680387638SSascha Wildner if (' ' == *(pp + 2) || '\0' == *(pp + 2)) 49780387638SSascha Wildner break; 49880387638SSascha Wildner } 49980387638SSascha Wildner 50080387638SSascha Wildner /* By default, assume a phrase. */ 50180387638SSascha Wildner rc = ARGS_PHRASE; 50280387638SSascha Wildner 50380387638SSascha Wildner /* 50480387638SSascha Wildner * Adjust new-buffer position to be beyond delimiter 50580387638SSascha Wildner * mark (e.g., Ta -> end + 2). 50680387638SSascha Wildner */ 50780387638SSascha Wildner if (p && pp) { 50880387638SSascha Wildner *pos += pp < p ? 2 : 1; 50980387638SSascha Wildner rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE; 51080387638SSascha Wildner p = pp < p ? pp : p; 51180387638SSascha Wildner } else if (p && ! pp) { 51280387638SSascha Wildner rc = ARGS_PPHRASE; 51380387638SSascha Wildner *pos += 1; 51480387638SSascha Wildner } else if (pp && ! p) { 51580387638SSascha Wildner p = pp; 51680387638SSascha Wildner *pos += 2; 51780387638SSascha Wildner } else { 51880387638SSascha Wildner rc = ARGS_PEND; 51980387638SSascha Wildner p = strchr(*v, 0); 52080387638SSascha Wildner } 52180387638SSascha Wildner 52280387638SSascha Wildner /* Whitespace check for eoln case... */ 523a4c7eb57SSascha Wildner if ('\0' == *p && ' ' == *(p - 1)) 524*070c62a6SFranco Fichtner mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 525*070c62a6SFranco Fichtner line, *pos, NULL); 52680387638SSascha Wildner 52780387638SSascha Wildner *pos += (int)(p - *v); 52880387638SSascha Wildner 52980387638SSascha Wildner /* Strip delimiter's preceding whitespace. */ 53080387638SSascha Wildner pp = p - 1; 53180387638SSascha Wildner while (pp > *v && ' ' == *pp) { 53280387638SSascha Wildner if (pp > *v && '\\' == *(pp - 1)) 53380387638SSascha Wildner break; 53480387638SSascha Wildner pp--; 53580387638SSascha Wildner } 53680387638SSascha Wildner *(pp + 1) = 0; 53780387638SSascha Wildner 53880387638SSascha Wildner /* Strip delimiter's proceeding whitespace. */ 53980387638SSascha Wildner for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++) 54080387638SSascha Wildner /* Skip ahead. */ ; 54180387638SSascha Wildner 54280387638SSascha Wildner return(rc); 54380387638SSascha Wildner } 54480387638SSascha Wildner 54580387638SSascha Wildner /* 54680387638SSascha Wildner * Process a quoted literal. A quote begins with a double-quote 54780387638SSascha Wildner * and ends with a double-quote NOT preceded by a double-quote. 5487888c61dSFranco Fichtner * NUL-terminate the literal in place. 5497888c61dSFranco Fichtner * Collapse pairs of quotes inside quoted literals. 55080387638SSascha Wildner * Whitespace is NOT involved in literal termination. 55180387638SSascha Wildner */ 55280387638SSascha Wildner 553f88b6c16SFranco Fichtner if (MDOC_PHRASELIT & mdoc->flags || '\"' == buf[*pos]) { 554f88b6c16SFranco Fichtner if ( ! (MDOC_PHRASELIT & mdoc->flags)) 55580387638SSascha Wildner *v = &buf[++(*pos)]; 55680387638SSascha Wildner 557f88b6c16SFranco Fichtner if (MDOC_PPHRASE & mdoc->flags) 558f88b6c16SFranco Fichtner mdoc->flags |= MDOC_PHRASELIT; 55980387638SSascha Wildner 5607888c61dSFranco Fichtner pairs = 0; 56180387638SSascha Wildner for ( ; buf[*pos]; (*pos)++) { 5627888c61dSFranco Fichtner /* Move following text left after quoted quotes. */ 5637888c61dSFranco Fichtner if (pairs) 5647888c61dSFranco Fichtner buf[*pos - pairs] = buf[*pos]; 56580387638SSascha Wildner if ('\"' != buf[*pos]) 56680387638SSascha Wildner continue; 5677888c61dSFranco Fichtner /* Unquoted quotes end quoted args. */ 56880387638SSascha Wildner if ('\"' != buf[*pos + 1]) 56980387638SSascha Wildner break; 5707888c61dSFranco Fichtner /* Quoted quotes collapse. */ 5717888c61dSFranco Fichtner pairs++; 57280387638SSascha Wildner (*pos)++; 57380387638SSascha Wildner } 5747888c61dSFranco Fichtner if (pairs) 5757888c61dSFranco Fichtner buf[*pos - pairs] = '\0'; 57680387638SSascha Wildner 57780387638SSascha Wildner if ('\0' == buf[*pos]) { 578f88b6c16SFranco Fichtner if (MDOC_PPHRASE & mdoc->flags) 57980387638SSascha Wildner return(ARGS_QWORD); 580*070c62a6SFranco Fichtner mandoc_msg(MANDOCERR_ARG_QUOTE, 581*070c62a6SFranco Fichtner mdoc->parse, line, *pos, NULL); 58280387638SSascha Wildner return(ARGS_QWORD); 58380387638SSascha Wildner } 58480387638SSascha Wildner 585f88b6c16SFranco Fichtner mdoc->flags &= ~MDOC_PHRASELIT; 58680387638SSascha Wildner buf[(*pos)++] = '\0'; 58780387638SSascha Wildner 58880387638SSascha Wildner if ('\0' == buf[*pos]) 58980387638SSascha Wildner return(ARGS_QWORD); 59080387638SSascha Wildner 59180387638SSascha Wildner while (' ' == buf[*pos]) 59280387638SSascha Wildner (*pos)++; 59380387638SSascha Wildner 594a4c7eb57SSascha Wildner if ('\0' == buf[*pos]) 595*070c62a6SFranco Fichtner mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 596*070c62a6SFranco Fichtner line, *pos, NULL); 59780387638SSascha Wildner 59880387638SSascha Wildner return(ARGS_QWORD); 59980387638SSascha Wildner } 60080387638SSascha Wildner 601a4c7eb57SSascha Wildner p = &buf[*pos]; 602f88b6c16SFranco Fichtner *v = mandoc_getarg(mdoc->parse, &p, line, pos); 60380387638SSascha Wildner 60480387638SSascha Wildner return(ARGS_WORD); 60580387638SSascha Wildner } 60680387638SSascha Wildner 60760e1e752SSascha Wildner /* 60860e1e752SSascha Wildner * Check if the string consists only of space-separated closing 60960e1e752SSascha Wildner * delimiters. This is a bit of a dance: the first must be a close 61060e1e752SSascha Wildner * delimiter, but it may be followed by middle delimiters. Arbitrary 61160e1e752SSascha Wildner * whitespace may separate these tokens. 61260e1e752SSascha Wildner */ 61360e1e752SSascha Wildner static int 614a4c7eb57SSascha Wildner args_checkpunct(const char *buf, int i) 61560e1e752SSascha Wildner { 61660e1e752SSascha Wildner int j; 61760e1e752SSascha Wildner char dbuf[DELIMSZ]; 61860e1e752SSascha Wildner enum mdelim d; 61980387638SSascha Wildner 62060e1e752SSascha Wildner /* First token must be a close-delimiter. */ 62160e1e752SSascha Wildner 62260e1e752SSascha Wildner for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++) 62360e1e752SSascha Wildner dbuf[j] = buf[i]; 62460e1e752SSascha Wildner 62560e1e752SSascha Wildner if (DELIMSZ == j) 62660e1e752SSascha Wildner return(0); 62760e1e752SSascha Wildner 62860e1e752SSascha Wildner dbuf[j] = '\0'; 62960e1e752SSascha Wildner if (DELIM_CLOSE != mdoc_isdelim(dbuf)) 63060e1e752SSascha Wildner return(0); 63160e1e752SSascha Wildner 63260e1e752SSascha Wildner while (' ' == buf[i]) 63360e1e752SSascha Wildner i++; 63460e1e752SSascha Wildner 63560e1e752SSascha Wildner /* Remaining must NOT be open/none. */ 63660e1e752SSascha Wildner 63760e1e752SSascha Wildner while (buf[i]) { 63860e1e752SSascha Wildner j = 0; 63960e1e752SSascha Wildner while (buf[i] && ' ' != buf[i] && j < DELIMSZ) 64060e1e752SSascha Wildner dbuf[j++] = buf[i++]; 64160e1e752SSascha Wildner 64260e1e752SSascha Wildner if (DELIMSZ == j) 64360e1e752SSascha Wildner return(0); 64460e1e752SSascha Wildner 64560e1e752SSascha Wildner dbuf[j] = '\0'; 64660e1e752SSascha Wildner d = mdoc_isdelim(dbuf); 64760e1e752SSascha Wildner if (DELIM_NONE == d || DELIM_OPEN == d) 64860e1e752SSascha Wildner return(0); 64960e1e752SSascha Wildner 65060e1e752SSascha Wildner while (' ' == buf[i]) 65160e1e752SSascha Wildner i++; 65260e1e752SSascha Wildner } 65360e1e752SSascha Wildner 65460e1e752SSascha Wildner return('\0' == buf[i]); 65560e1e752SSascha Wildner } 65660e1e752SSascha Wildner 65780387638SSascha Wildner static int 658f88b6c16SFranco Fichtner argv_multi(struct mdoc *mdoc, int line, 65980387638SSascha Wildner struct mdoc_argv *v, int *pos, char *buf) 66080387638SSascha Wildner { 66180387638SSascha Wildner enum margserr ac; 66280387638SSascha Wildner char *p; 66380387638SSascha Wildner 66480387638SSascha Wildner for (v->sz = 0; ; v->sz++) { 66580387638SSascha Wildner if ('-' == buf[*pos]) 66680387638SSascha Wildner break; 667f88b6c16SFranco Fichtner ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); 66880387638SSascha Wildner if (ARGS_ERROR == ac) 66980387638SSascha Wildner return(0); 67080387638SSascha Wildner else if (ARGS_EOLN == ac) 67180387638SSascha Wildner break; 67280387638SSascha Wildner 67380387638SSascha Wildner if (0 == v->sz % MULTI_STEP) 674*070c62a6SFranco Fichtner v->value = mandoc_reallocarray(v->value, 675*070c62a6SFranco Fichtner v->sz + MULTI_STEP, sizeof(char *)); 67680387638SSascha Wildner 67780387638SSascha Wildner v->value[(int)v->sz] = mandoc_strdup(p); 67880387638SSascha Wildner } 67980387638SSascha Wildner 68080387638SSascha Wildner return(1); 68180387638SSascha Wildner } 68280387638SSascha Wildner 68380387638SSascha Wildner static int 684f88b6c16SFranco Fichtner argv_single(struct mdoc *mdoc, int line, 68580387638SSascha Wildner struct mdoc_argv *v, int *pos, char *buf) 68680387638SSascha Wildner { 68780387638SSascha Wildner enum margserr ac; 68880387638SSascha Wildner char *p; 68980387638SSascha Wildner 690f88b6c16SFranco Fichtner ac = args(mdoc, line, pos, buf, ARGSFL_NONE, &p); 69180387638SSascha Wildner if (ARGS_ERROR == ac) 69280387638SSascha Wildner return(0); 69380387638SSascha Wildner if (ARGS_EOLN == ac) 69480387638SSascha Wildner return(1); 69580387638SSascha Wildner 69680387638SSascha Wildner v->sz = 1; 69780387638SSascha Wildner v->value = mandoc_malloc(sizeof(char *)); 69880387638SSascha Wildner v->value[0] = mandoc_strdup(p); 69980387638SSascha Wildner 70080387638SSascha Wildner return(1); 70180387638SSascha Wildner } 702