1*80387638SSascha Wildner /* $Id: mdoc_argv.c,v 1.62 2010/12/24 14:00:40 kristaps Exp $ */ 2*80387638SSascha Wildner /* 3*80387638SSascha Wildner * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4*80387638SSascha Wildner * 5*80387638SSascha Wildner * Permission to use, copy, modify, and distribute this software for any 6*80387638SSascha Wildner * purpose with or without fee is hereby granted, provided that the above 7*80387638SSascha Wildner * copyright notice and this permission notice appear in all copies. 8*80387638SSascha Wildner * 9*80387638SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10*80387638SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11*80387638SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12*80387638SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13*80387638SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14*80387638SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15*80387638SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16*80387638SSascha Wildner */ 17*80387638SSascha Wildner #ifdef HAVE_CONFIG_H 18*80387638SSascha Wildner #include "config.h" 19*80387638SSascha Wildner #endif 20*80387638SSascha Wildner 21*80387638SSascha Wildner #include <sys/types.h> 22*80387638SSascha Wildner 23*80387638SSascha Wildner #include <assert.h> 24*80387638SSascha Wildner #include <ctype.h> 25*80387638SSascha Wildner #include <stdlib.h> 26*80387638SSascha Wildner #include <stdio.h> 27*80387638SSascha Wildner #include <string.h> 28*80387638SSascha Wildner 29*80387638SSascha Wildner #include "mandoc.h" 30*80387638SSascha Wildner #include "libmdoc.h" 31*80387638SSascha Wildner #include "libmandoc.h" 32*80387638SSascha Wildner 33*80387638SSascha Wildner /* 34*80387638SSascha Wildner * Routines to parse arguments of macros. Arguments follow the syntax 35*80387638SSascha Wildner * of `-arg [val [valN...]]'. Arguments come in all types: quoted 36*80387638SSascha Wildner * arguments, multiple arguments per value, no-value arguments, etc. 37*80387638SSascha Wildner * 38*80387638SSascha Wildner * There's no limit to the number or arguments that may be allocated. 39*80387638SSascha Wildner */ 40*80387638SSascha Wildner 41*80387638SSascha Wildner #define ARGV_NONE (1 << 0) 42*80387638SSascha Wildner #define ARGV_SINGLE (1 << 1) 43*80387638SSascha Wildner #define ARGV_MULTI (1 << 2) 44*80387638SSascha Wildner #define ARGV_OPT_SINGLE (1 << 3) 45*80387638SSascha Wildner 46*80387638SSascha Wildner #define MULTI_STEP 5 47*80387638SSascha Wildner 48*80387638SSascha Wildner static enum mdocargt argv_a2arg(enum mdoct, const char *); 49*80387638SSascha Wildner static enum margserr args(struct mdoc *, int, int *, 50*80387638SSascha Wildner char *, int, char **); 51*80387638SSascha Wildner static int argv(struct mdoc *, int, 52*80387638SSascha Wildner struct mdoc_argv *, int *, char *); 53*80387638SSascha Wildner static int argv_single(struct mdoc *, int, 54*80387638SSascha Wildner struct mdoc_argv *, int *, char *); 55*80387638SSascha Wildner static int argv_opt_single(struct mdoc *, int, 56*80387638SSascha Wildner struct mdoc_argv *, int *, char *); 57*80387638SSascha Wildner static int argv_multi(struct mdoc *, int, 58*80387638SSascha Wildner struct mdoc_argv *, int *, char *); 59*80387638SSascha Wildner 60*80387638SSascha Wildner /* Per-argument flags. */ 61*80387638SSascha Wildner 62*80387638SSascha Wildner static int mdoc_argvflags[MDOC_ARG_MAX] = { 63*80387638SSascha Wildner ARGV_NONE, /* MDOC_Split */ 64*80387638SSascha Wildner ARGV_NONE, /* MDOC_Nosplit */ 65*80387638SSascha Wildner ARGV_NONE, /* MDOC_Ragged */ 66*80387638SSascha Wildner ARGV_NONE, /* MDOC_Unfilled */ 67*80387638SSascha Wildner ARGV_NONE, /* MDOC_Literal */ 68*80387638SSascha Wildner ARGV_SINGLE, /* MDOC_File */ 69*80387638SSascha Wildner ARGV_OPT_SINGLE, /* MDOC_Offset */ 70*80387638SSascha Wildner ARGV_NONE, /* MDOC_Bullet */ 71*80387638SSascha Wildner ARGV_NONE, /* MDOC_Dash */ 72*80387638SSascha Wildner ARGV_NONE, /* MDOC_Hyphen */ 73*80387638SSascha Wildner ARGV_NONE, /* MDOC_Item */ 74*80387638SSascha Wildner ARGV_NONE, /* MDOC_Enum */ 75*80387638SSascha Wildner ARGV_NONE, /* MDOC_Tag */ 76*80387638SSascha Wildner ARGV_NONE, /* MDOC_Diag */ 77*80387638SSascha Wildner ARGV_NONE, /* MDOC_Hang */ 78*80387638SSascha Wildner ARGV_NONE, /* MDOC_Ohang */ 79*80387638SSascha Wildner ARGV_NONE, /* MDOC_Inset */ 80*80387638SSascha Wildner ARGV_MULTI, /* MDOC_Column */ 81*80387638SSascha Wildner ARGV_SINGLE, /* MDOC_Width */ 82*80387638SSascha Wildner ARGV_NONE, /* MDOC_Compact */ 83*80387638SSascha Wildner ARGV_NONE, /* MDOC_Std */ 84*80387638SSascha Wildner ARGV_NONE, /* MDOC_Filled */ 85*80387638SSascha Wildner ARGV_NONE, /* MDOC_Words */ 86*80387638SSascha Wildner ARGV_NONE, /* MDOC_Emphasis */ 87*80387638SSascha Wildner ARGV_NONE, /* MDOC_Symbolic */ 88*80387638SSascha Wildner ARGV_NONE /* MDOC_Symbolic */ 89*80387638SSascha Wildner }; 90*80387638SSascha Wildner 91*80387638SSascha Wildner static int mdoc_argflags[MDOC_MAX] = { 92*80387638SSascha Wildner 0, /* Ap */ 93*80387638SSascha Wildner 0, /* Dd */ 94*80387638SSascha Wildner 0, /* Dt */ 95*80387638SSascha Wildner 0, /* Os */ 96*80387638SSascha Wildner 0, /* Sh */ 97*80387638SSascha Wildner 0, /* Ss */ 98*80387638SSascha Wildner 0, /* Pp */ 99*80387638SSascha Wildner ARGS_DELIM, /* D1 */ 100*80387638SSascha Wildner ARGS_DELIM, /* Dl */ 101*80387638SSascha Wildner 0, /* Bd */ 102*80387638SSascha Wildner 0, /* Ed */ 103*80387638SSascha Wildner 0, /* Bl */ 104*80387638SSascha Wildner 0, /* El */ 105*80387638SSascha Wildner 0, /* It */ 106*80387638SSascha Wildner ARGS_DELIM, /* Ad */ 107*80387638SSascha Wildner ARGS_DELIM, /* An */ 108*80387638SSascha Wildner ARGS_DELIM, /* Ar */ 109*80387638SSascha Wildner 0, /* Cd */ 110*80387638SSascha Wildner ARGS_DELIM, /* Cm */ 111*80387638SSascha Wildner ARGS_DELIM, /* Dv */ 112*80387638SSascha Wildner ARGS_DELIM, /* Er */ 113*80387638SSascha Wildner ARGS_DELIM, /* Ev */ 114*80387638SSascha Wildner 0, /* Ex */ 115*80387638SSascha Wildner ARGS_DELIM, /* Fa */ 116*80387638SSascha Wildner 0, /* Fd */ 117*80387638SSascha Wildner ARGS_DELIM, /* Fl */ 118*80387638SSascha Wildner ARGS_DELIM, /* Fn */ 119*80387638SSascha Wildner ARGS_DELIM, /* Ft */ 120*80387638SSascha Wildner ARGS_DELIM, /* Ic */ 121*80387638SSascha Wildner 0, /* In */ 122*80387638SSascha Wildner ARGS_DELIM, /* Li */ 123*80387638SSascha Wildner 0, /* Nd */ 124*80387638SSascha Wildner ARGS_DELIM, /* Nm */ 125*80387638SSascha Wildner ARGS_DELIM, /* Op */ 126*80387638SSascha Wildner 0, /* Ot */ 127*80387638SSascha Wildner ARGS_DELIM, /* Pa */ 128*80387638SSascha Wildner 0, /* Rv */ 129*80387638SSascha Wildner ARGS_DELIM, /* St */ 130*80387638SSascha Wildner ARGS_DELIM, /* Va */ 131*80387638SSascha Wildner ARGS_DELIM, /* Vt */ 132*80387638SSascha Wildner ARGS_DELIM, /* Xr */ 133*80387638SSascha Wildner 0, /* %A */ 134*80387638SSascha Wildner 0, /* %B */ 135*80387638SSascha Wildner 0, /* %D */ 136*80387638SSascha Wildner 0, /* %I */ 137*80387638SSascha Wildner 0, /* %J */ 138*80387638SSascha Wildner 0, /* %N */ 139*80387638SSascha Wildner 0, /* %O */ 140*80387638SSascha Wildner 0, /* %P */ 141*80387638SSascha Wildner 0, /* %R */ 142*80387638SSascha Wildner 0, /* %T */ 143*80387638SSascha Wildner 0, /* %V */ 144*80387638SSascha Wildner ARGS_DELIM, /* Ac */ 145*80387638SSascha Wildner 0, /* Ao */ 146*80387638SSascha Wildner ARGS_DELIM, /* Aq */ 147*80387638SSascha Wildner ARGS_DELIM, /* At */ 148*80387638SSascha Wildner ARGS_DELIM, /* Bc */ 149*80387638SSascha Wildner 0, /* Bf */ 150*80387638SSascha Wildner 0, /* Bo */ 151*80387638SSascha Wildner ARGS_DELIM, /* Bq */ 152*80387638SSascha Wildner ARGS_DELIM, /* Bsx */ 153*80387638SSascha Wildner ARGS_DELIM, /* Bx */ 154*80387638SSascha Wildner 0, /* Db */ 155*80387638SSascha Wildner ARGS_DELIM, /* Dc */ 156*80387638SSascha Wildner 0, /* Do */ 157*80387638SSascha Wildner ARGS_DELIM, /* Dq */ 158*80387638SSascha Wildner ARGS_DELIM, /* Ec */ 159*80387638SSascha Wildner 0, /* Ef */ 160*80387638SSascha Wildner ARGS_DELIM, /* Em */ 161*80387638SSascha Wildner 0, /* Eo */ 162*80387638SSascha Wildner ARGS_DELIM, /* Fx */ 163*80387638SSascha Wildner ARGS_DELIM, /* Ms */ 164*80387638SSascha Wildner ARGS_DELIM, /* No */ 165*80387638SSascha Wildner ARGS_DELIM, /* Ns */ 166*80387638SSascha Wildner ARGS_DELIM, /* Nx */ 167*80387638SSascha Wildner ARGS_DELIM, /* Ox */ 168*80387638SSascha Wildner ARGS_DELIM, /* Pc */ 169*80387638SSascha Wildner ARGS_DELIM, /* Pf */ 170*80387638SSascha Wildner 0, /* Po */ 171*80387638SSascha Wildner ARGS_DELIM, /* Pq */ 172*80387638SSascha Wildner ARGS_DELIM, /* Qc */ 173*80387638SSascha Wildner ARGS_DELIM, /* Ql */ 174*80387638SSascha Wildner 0, /* Qo */ 175*80387638SSascha Wildner ARGS_DELIM, /* Qq */ 176*80387638SSascha Wildner 0, /* Re */ 177*80387638SSascha Wildner 0, /* Rs */ 178*80387638SSascha Wildner ARGS_DELIM, /* Sc */ 179*80387638SSascha Wildner 0, /* So */ 180*80387638SSascha Wildner ARGS_DELIM, /* Sq */ 181*80387638SSascha Wildner 0, /* Sm */ 182*80387638SSascha Wildner ARGS_DELIM, /* Sx */ 183*80387638SSascha Wildner ARGS_DELIM, /* Sy */ 184*80387638SSascha Wildner ARGS_DELIM, /* Tn */ 185*80387638SSascha Wildner ARGS_DELIM, /* Ux */ 186*80387638SSascha Wildner ARGS_DELIM, /* Xc */ 187*80387638SSascha Wildner 0, /* Xo */ 188*80387638SSascha Wildner 0, /* Fo */ 189*80387638SSascha Wildner 0, /* Fc */ 190*80387638SSascha Wildner 0, /* Oo */ 191*80387638SSascha Wildner ARGS_DELIM, /* Oc */ 192*80387638SSascha Wildner 0, /* Bk */ 193*80387638SSascha Wildner 0, /* Ek */ 194*80387638SSascha Wildner 0, /* Bt */ 195*80387638SSascha Wildner 0, /* Hf */ 196*80387638SSascha Wildner 0, /* Fr */ 197*80387638SSascha Wildner 0, /* Ud */ 198*80387638SSascha Wildner 0, /* Lb */ 199*80387638SSascha Wildner 0, /* Lp */ 200*80387638SSascha Wildner ARGS_DELIM, /* Lk */ 201*80387638SSascha Wildner ARGS_DELIM, /* Mt */ 202*80387638SSascha Wildner ARGS_DELIM, /* Brq */ 203*80387638SSascha Wildner 0, /* Bro */ 204*80387638SSascha Wildner ARGS_DELIM, /* Brc */ 205*80387638SSascha Wildner 0, /* %C */ 206*80387638SSascha Wildner 0, /* Es */ 207*80387638SSascha Wildner 0, /* En */ 208*80387638SSascha Wildner 0, /* Dx */ 209*80387638SSascha Wildner 0, /* %Q */ 210*80387638SSascha Wildner 0, /* br */ 211*80387638SSascha Wildner 0, /* sp */ 212*80387638SSascha Wildner 0, /* %U */ 213*80387638SSascha Wildner 0, /* Ta */ 214*80387638SSascha Wildner }; 215*80387638SSascha Wildner 216*80387638SSascha Wildner 217*80387638SSascha Wildner /* 218*80387638SSascha Wildner * Parse an argument from line text. This comes in the form of -key 219*80387638SSascha Wildner * [value0...], which may either have a single mandatory value, at least 220*80387638SSascha Wildner * one mandatory value, an optional single value, or no value. 221*80387638SSascha Wildner */ 222*80387638SSascha Wildner enum margverr 223*80387638SSascha Wildner mdoc_argv(struct mdoc *m, int line, enum mdoct tok, 224*80387638SSascha Wildner struct mdoc_arg **v, int *pos, char *buf) 225*80387638SSascha Wildner { 226*80387638SSascha Wildner char *p, sv; 227*80387638SSascha Wildner struct mdoc_argv tmp; 228*80387638SSascha Wildner struct mdoc_arg *arg; 229*80387638SSascha Wildner 230*80387638SSascha Wildner if ('\0' == buf[*pos]) 231*80387638SSascha Wildner return(ARGV_EOLN); 232*80387638SSascha Wildner 233*80387638SSascha Wildner assert(' ' != buf[*pos]); 234*80387638SSascha Wildner 235*80387638SSascha Wildner /* Parse through to the first unescaped space. */ 236*80387638SSascha Wildner 237*80387638SSascha Wildner p = &buf[++(*pos)]; 238*80387638SSascha Wildner 239*80387638SSascha Wildner assert(*pos > 0); 240*80387638SSascha Wildner 241*80387638SSascha Wildner /* LINTED */ 242*80387638SSascha Wildner while (buf[*pos]) { 243*80387638SSascha Wildner if (' ' == buf[*pos]) 244*80387638SSascha Wildner if ('\\' != buf[*pos - 1]) 245*80387638SSascha Wildner break; 246*80387638SSascha Wildner (*pos)++; 247*80387638SSascha Wildner } 248*80387638SSascha Wildner 249*80387638SSascha Wildner /* XXX - save zeroed byte, if not an argument. */ 250*80387638SSascha Wildner 251*80387638SSascha Wildner sv = '\0'; 252*80387638SSascha Wildner if (buf[*pos]) { 253*80387638SSascha Wildner sv = buf[*pos]; 254*80387638SSascha Wildner buf[(*pos)++] = '\0'; 255*80387638SSascha Wildner } 256*80387638SSascha Wildner 257*80387638SSascha Wildner (void)memset(&tmp, 0, sizeof(struct mdoc_argv)); 258*80387638SSascha Wildner tmp.line = line; 259*80387638SSascha Wildner tmp.pos = *pos; 260*80387638SSascha Wildner 261*80387638SSascha Wildner /* See if our token accepts the argument. */ 262*80387638SSascha Wildner 263*80387638SSascha Wildner if (MDOC_ARG_MAX == (tmp.arg = argv_a2arg(tok, p))) { 264*80387638SSascha Wildner /* XXX - restore saved zeroed byte. */ 265*80387638SSascha Wildner if (sv) 266*80387638SSascha Wildner buf[*pos - 1] = sv; 267*80387638SSascha Wildner return(ARGV_WORD); 268*80387638SSascha Wildner } 269*80387638SSascha Wildner 270*80387638SSascha Wildner while (buf[*pos] && ' ' == buf[*pos]) 271*80387638SSascha Wildner (*pos)++; 272*80387638SSascha Wildner 273*80387638SSascha Wildner if ( ! argv(m, line, &tmp, pos, buf)) 274*80387638SSascha Wildner return(ARGV_ERROR); 275*80387638SSascha Wildner 276*80387638SSascha Wildner if (NULL == (arg = *v)) 277*80387638SSascha Wildner arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg)); 278*80387638SSascha Wildner 279*80387638SSascha Wildner arg->argc++; 280*80387638SSascha Wildner arg->argv = mandoc_realloc 281*80387638SSascha Wildner (arg->argv, arg->argc * sizeof(struct mdoc_argv)); 282*80387638SSascha Wildner 283*80387638SSascha Wildner (void)memcpy(&arg->argv[(int)arg->argc - 1], 284*80387638SSascha Wildner &tmp, sizeof(struct mdoc_argv)); 285*80387638SSascha Wildner 286*80387638SSascha Wildner return(ARGV_ARG); 287*80387638SSascha Wildner } 288*80387638SSascha Wildner 289*80387638SSascha Wildner 290*80387638SSascha Wildner void 291*80387638SSascha Wildner mdoc_argv_free(struct mdoc_arg *p) 292*80387638SSascha Wildner { 293*80387638SSascha Wildner int i; 294*80387638SSascha Wildner 295*80387638SSascha Wildner if (NULL == p) 296*80387638SSascha Wildner return; 297*80387638SSascha Wildner 298*80387638SSascha Wildner if (p->refcnt) { 299*80387638SSascha Wildner --(p->refcnt); 300*80387638SSascha Wildner if (p->refcnt) 301*80387638SSascha Wildner return; 302*80387638SSascha Wildner } 303*80387638SSascha Wildner assert(p->argc); 304*80387638SSascha Wildner 305*80387638SSascha Wildner for (i = (int)p->argc - 1; i >= 0; i--) 306*80387638SSascha Wildner mdoc_argn_free(p, i); 307*80387638SSascha Wildner 308*80387638SSascha Wildner free(p->argv); 309*80387638SSascha Wildner free(p); 310*80387638SSascha Wildner } 311*80387638SSascha Wildner 312*80387638SSascha Wildner 313*80387638SSascha Wildner void 314*80387638SSascha Wildner mdoc_argn_free(struct mdoc_arg *p, int iarg) 315*80387638SSascha Wildner { 316*80387638SSascha Wildner struct mdoc_argv *arg; 317*80387638SSascha Wildner int j; 318*80387638SSascha Wildner 319*80387638SSascha Wildner arg = &p->argv[iarg]; 320*80387638SSascha Wildner 321*80387638SSascha Wildner if (arg->sz && arg->value) { 322*80387638SSascha Wildner for (j = (int)arg->sz - 1; j >= 0; j--) 323*80387638SSascha Wildner free(arg->value[j]); 324*80387638SSascha Wildner free(arg->value); 325*80387638SSascha Wildner } 326*80387638SSascha Wildner 327*80387638SSascha Wildner for (--p->argc; iarg < (int)p->argc; iarg++) 328*80387638SSascha Wildner p->argv[iarg] = p->argv[iarg+1]; 329*80387638SSascha Wildner } 330*80387638SSascha Wildner 331*80387638SSascha Wildner 332*80387638SSascha Wildner enum margserr 333*80387638SSascha Wildner mdoc_zargs(struct mdoc *m, int line, int *pos, 334*80387638SSascha Wildner char *buf, int flags, char **v) 335*80387638SSascha Wildner { 336*80387638SSascha Wildner 337*80387638SSascha Wildner return(args(m, line, pos, buf, flags, v)); 338*80387638SSascha Wildner } 339*80387638SSascha Wildner 340*80387638SSascha Wildner 341*80387638SSascha Wildner enum margserr 342*80387638SSascha Wildner mdoc_args(struct mdoc *m, int line, int *pos, 343*80387638SSascha Wildner char *buf, enum mdoct tok, char **v) 344*80387638SSascha Wildner { 345*80387638SSascha Wildner int fl; 346*80387638SSascha Wildner struct mdoc_node *n; 347*80387638SSascha Wildner 348*80387638SSascha Wildner fl = mdoc_argflags[tok]; 349*80387638SSascha Wildner 350*80387638SSascha Wildner if (MDOC_It != tok) 351*80387638SSascha Wildner return(args(m, line, pos, buf, fl, v)); 352*80387638SSascha Wildner 353*80387638SSascha Wildner /* 354*80387638SSascha Wildner * We know that we're in an `It', so it's reasonable to expect 355*80387638SSascha Wildner * us to be sitting in a `Bl'. Someday this may not be the case 356*80387638SSascha Wildner * (if we allow random `It's sitting out there), so provide a 357*80387638SSascha Wildner * safe fall-back into the default behaviour. 358*80387638SSascha Wildner */ 359*80387638SSascha Wildner 360*80387638SSascha Wildner for (n = m->last; n; n = n->parent) 361*80387638SSascha Wildner if (MDOC_Bl == n->tok) 362*80387638SSascha Wildner break; 363*80387638SSascha Wildner 364*80387638SSascha Wildner if (n && LIST_column == n->norm->Bl.type) { 365*80387638SSascha Wildner fl |= ARGS_TABSEP; 366*80387638SSascha Wildner fl &= ~ARGS_DELIM; 367*80387638SSascha Wildner } 368*80387638SSascha Wildner 369*80387638SSascha Wildner return(args(m, line, pos, buf, fl, v)); 370*80387638SSascha Wildner } 371*80387638SSascha Wildner 372*80387638SSascha Wildner 373*80387638SSascha Wildner static enum margserr 374*80387638SSascha Wildner args(struct mdoc *m, int line, int *pos, 375*80387638SSascha Wildner char *buf, int fl, char **v) 376*80387638SSascha Wildner { 377*80387638SSascha Wildner int i; 378*80387638SSascha Wildner char *p, *pp; 379*80387638SSascha Wildner enum margserr rc; 380*80387638SSascha Wildner enum mdelim d; 381*80387638SSascha Wildner 382*80387638SSascha Wildner /* 383*80387638SSascha Wildner * Parse out the terms (like `val' in `.Xx -arg val' or simply 384*80387638SSascha Wildner * `.Xx val'), which can have all sorts of properties: 385*80387638SSascha Wildner * 386*80387638SSascha Wildner * ARGS_DELIM: use special handling if encountering trailing 387*80387638SSascha Wildner * delimiters in the form of [[::delim::][ ]+]+. 388*80387638SSascha Wildner * 389*80387638SSascha Wildner * ARGS_NOWARN: don't post warnings. This is only used when 390*80387638SSascha Wildner * re-parsing delimiters, as the warnings have already been 391*80387638SSascha Wildner * posted. 392*80387638SSascha Wildner * 393*80387638SSascha Wildner * ARGS_TABSEP: use special handling for tab/`Ta' separated 394*80387638SSascha Wildner * phrases like in `Bl -column'. 395*80387638SSascha Wildner */ 396*80387638SSascha Wildner 397*80387638SSascha Wildner assert(' ' != buf[*pos]); 398*80387638SSascha Wildner 399*80387638SSascha Wildner if ('\0' == buf[*pos]) { 400*80387638SSascha Wildner if (MDOC_PPHRASE & m->flags) 401*80387638SSascha Wildner return(ARGS_EOLN); 402*80387638SSascha Wildner /* 403*80387638SSascha Wildner * If we're not in a partial phrase and the flag for 404*80387638SSascha Wildner * being a phrase literal is still set, the punctuation 405*80387638SSascha Wildner * is unterminated. 406*80387638SSascha Wildner */ 407*80387638SSascha Wildner if (MDOC_PHRASELIT & m->flags) 408*80387638SSascha Wildner if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE)) 409*80387638SSascha Wildner return(ARGS_ERROR); 410*80387638SSascha Wildner 411*80387638SSascha Wildner m->flags &= ~MDOC_PHRASELIT; 412*80387638SSascha Wildner return(ARGS_EOLN); 413*80387638SSascha Wildner } 414*80387638SSascha Wildner 415*80387638SSascha Wildner /* 416*80387638SSascha Wildner * If the first character is a closing delimiter and we're to 417*80387638SSascha Wildner * look for delimited strings, then pass down the buffer seeing 418*80387638SSascha Wildner * if it follows the pattern of [[::delim::][ ]+]+. Note that 419*80387638SSascha Wildner * we ONLY care about closing delimiters. 420*80387638SSascha Wildner */ 421*80387638SSascha Wildner 422*80387638SSascha Wildner if ((fl & ARGS_DELIM) && DELIM_CLOSE == mdoc_iscdelim(buf[*pos])) { 423*80387638SSascha Wildner for (i = *pos; buf[i]; ) { 424*80387638SSascha Wildner d = mdoc_iscdelim(buf[i]); 425*80387638SSascha Wildner if (DELIM_NONE == d || DELIM_OPEN == d) 426*80387638SSascha Wildner break; 427*80387638SSascha Wildner i++; 428*80387638SSascha Wildner if ('\0' == buf[i] || ' ' != buf[i]) 429*80387638SSascha Wildner break; 430*80387638SSascha Wildner i++; 431*80387638SSascha Wildner while (buf[i] && ' ' == buf[i]) 432*80387638SSascha Wildner i++; 433*80387638SSascha Wildner } 434*80387638SSascha Wildner 435*80387638SSascha Wildner if ('\0' == buf[i]) { 436*80387638SSascha Wildner *v = &buf[*pos]; 437*80387638SSascha Wildner if (i && ' ' != buf[i - 1]) 438*80387638SSascha Wildner return(ARGS_PUNCT); 439*80387638SSascha Wildner if (ARGS_NOWARN & fl) 440*80387638SSascha Wildner return(ARGS_PUNCT); 441*80387638SSascha Wildner if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) 442*80387638SSascha Wildner return(ARGS_ERROR); 443*80387638SSascha Wildner return(ARGS_PUNCT); 444*80387638SSascha Wildner } 445*80387638SSascha Wildner } 446*80387638SSascha Wildner 447*80387638SSascha Wildner *v = &buf[*pos]; 448*80387638SSascha Wildner 449*80387638SSascha Wildner /* 450*80387638SSascha Wildner * First handle TABSEP items, restricted to `Bl -column'. This 451*80387638SSascha Wildner * ignores conventional token parsing and instead uses tabs or 452*80387638SSascha Wildner * `Ta' macros to separate phrases. Phrases are parsed again 453*80387638SSascha Wildner * for arguments at a later phase. 454*80387638SSascha Wildner */ 455*80387638SSascha Wildner 456*80387638SSascha Wildner if (ARGS_TABSEP & fl) { 457*80387638SSascha Wildner /* Scan ahead to tab (can't be escaped). */ 458*80387638SSascha Wildner p = strchr(*v, '\t'); 459*80387638SSascha Wildner pp = NULL; 460*80387638SSascha Wildner 461*80387638SSascha Wildner /* Scan ahead to unescaped `Ta'. */ 462*80387638SSascha Wildner if ( ! (MDOC_PHRASELIT & m->flags)) 463*80387638SSascha Wildner for (pp = *v; ; pp++) { 464*80387638SSascha Wildner if (NULL == (pp = strstr(pp, "Ta"))) 465*80387638SSascha Wildner break; 466*80387638SSascha Wildner if (pp > *v && ' ' != *(pp - 1)) 467*80387638SSascha Wildner continue; 468*80387638SSascha Wildner if (' ' == *(pp + 2) || '\0' == *(pp + 2)) 469*80387638SSascha Wildner break; 470*80387638SSascha Wildner } 471*80387638SSascha Wildner 472*80387638SSascha Wildner /* By default, assume a phrase. */ 473*80387638SSascha Wildner rc = ARGS_PHRASE; 474*80387638SSascha Wildner 475*80387638SSascha Wildner /* 476*80387638SSascha Wildner * Adjust new-buffer position to be beyond delimiter 477*80387638SSascha Wildner * mark (e.g., Ta -> end + 2). 478*80387638SSascha Wildner */ 479*80387638SSascha Wildner if (p && pp) { 480*80387638SSascha Wildner *pos += pp < p ? 2 : 1; 481*80387638SSascha Wildner rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE; 482*80387638SSascha Wildner p = pp < p ? pp : p; 483*80387638SSascha Wildner } else if (p && ! pp) { 484*80387638SSascha Wildner rc = ARGS_PPHRASE; 485*80387638SSascha Wildner *pos += 1; 486*80387638SSascha Wildner } else if (pp && ! p) { 487*80387638SSascha Wildner p = pp; 488*80387638SSascha Wildner *pos += 2; 489*80387638SSascha Wildner } else { 490*80387638SSascha Wildner rc = ARGS_PEND; 491*80387638SSascha Wildner p = strchr(*v, 0); 492*80387638SSascha Wildner } 493*80387638SSascha Wildner 494*80387638SSascha Wildner /* Whitespace check for eoln case... */ 495*80387638SSascha Wildner if ('\0' == *p && ' ' == *(p - 1) && ! (ARGS_NOWARN & fl)) 496*80387638SSascha Wildner if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) 497*80387638SSascha Wildner return(ARGS_ERROR); 498*80387638SSascha Wildner 499*80387638SSascha Wildner *pos += (int)(p - *v); 500*80387638SSascha Wildner 501*80387638SSascha Wildner /* Strip delimiter's preceding whitespace. */ 502*80387638SSascha Wildner pp = p - 1; 503*80387638SSascha Wildner while (pp > *v && ' ' == *pp) { 504*80387638SSascha Wildner if (pp > *v && '\\' == *(pp - 1)) 505*80387638SSascha Wildner break; 506*80387638SSascha Wildner pp--; 507*80387638SSascha Wildner } 508*80387638SSascha Wildner *(pp + 1) = 0; 509*80387638SSascha Wildner 510*80387638SSascha Wildner /* Strip delimiter's proceeding whitespace. */ 511*80387638SSascha Wildner for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++) 512*80387638SSascha Wildner /* Skip ahead. */ ; 513*80387638SSascha Wildner 514*80387638SSascha Wildner return(rc); 515*80387638SSascha Wildner } 516*80387638SSascha Wildner 517*80387638SSascha Wildner /* 518*80387638SSascha Wildner * Process a quoted literal. A quote begins with a double-quote 519*80387638SSascha Wildner * and ends with a double-quote NOT preceded by a double-quote. 520*80387638SSascha Wildner * Whitespace is NOT involved in literal termination. 521*80387638SSascha Wildner */ 522*80387638SSascha Wildner 523*80387638SSascha Wildner if (MDOC_PHRASELIT & m->flags || '\"' == buf[*pos]) { 524*80387638SSascha Wildner if ( ! (MDOC_PHRASELIT & m->flags)) 525*80387638SSascha Wildner *v = &buf[++(*pos)]; 526*80387638SSascha Wildner 527*80387638SSascha Wildner if (MDOC_PPHRASE & m->flags) 528*80387638SSascha Wildner m->flags |= MDOC_PHRASELIT; 529*80387638SSascha Wildner 530*80387638SSascha Wildner for ( ; buf[*pos]; (*pos)++) { 531*80387638SSascha Wildner if ('\"' != buf[*pos]) 532*80387638SSascha Wildner continue; 533*80387638SSascha Wildner if ('\"' != buf[*pos + 1]) 534*80387638SSascha Wildner break; 535*80387638SSascha Wildner (*pos)++; 536*80387638SSascha Wildner } 537*80387638SSascha Wildner 538*80387638SSascha Wildner if ('\0' == buf[*pos]) { 539*80387638SSascha Wildner if (ARGS_NOWARN & fl || MDOC_PPHRASE & m->flags) 540*80387638SSascha Wildner return(ARGS_QWORD); 541*80387638SSascha Wildner if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE)) 542*80387638SSascha Wildner return(ARGS_ERROR); 543*80387638SSascha Wildner return(ARGS_QWORD); 544*80387638SSascha Wildner } 545*80387638SSascha Wildner 546*80387638SSascha Wildner m->flags &= ~MDOC_PHRASELIT; 547*80387638SSascha Wildner buf[(*pos)++] = '\0'; 548*80387638SSascha Wildner 549*80387638SSascha Wildner if ('\0' == buf[*pos]) 550*80387638SSascha Wildner return(ARGS_QWORD); 551*80387638SSascha Wildner 552*80387638SSascha Wildner while (' ' == buf[*pos]) 553*80387638SSascha Wildner (*pos)++; 554*80387638SSascha Wildner 555*80387638SSascha Wildner if (0 == buf[*pos] && ! (ARGS_NOWARN & fl)) 556*80387638SSascha Wildner if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) 557*80387638SSascha Wildner return(ARGS_ERROR); 558*80387638SSascha Wildner 559*80387638SSascha Wildner return(ARGS_QWORD); 560*80387638SSascha Wildner } 561*80387638SSascha Wildner 562*80387638SSascha Wildner /* 563*80387638SSascha Wildner * A non-quoted term progresses until either the end of line or 564*80387638SSascha Wildner * a non-escaped whitespace. 565*80387638SSascha Wildner */ 566*80387638SSascha Wildner 567*80387638SSascha Wildner for ( ; buf[*pos]; (*pos)++) 568*80387638SSascha Wildner if (*pos && ' ' == buf[*pos] && '\\' != buf[*pos - 1]) 569*80387638SSascha Wildner break; 570*80387638SSascha Wildner 571*80387638SSascha Wildner if ('\0' == buf[*pos]) 572*80387638SSascha Wildner return(ARGS_WORD); 573*80387638SSascha Wildner 574*80387638SSascha Wildner buf[(*pos)++] = '\0'; 575*80387638SSascha Wildner 576*80387638SSascha Wildner while (' ' == buf[*pos]) 577*80387638SSascha Wildner (*pos)++; 578*80387638SSascha Wildner 579*80387638SSascha Wildner if ('\0' == buf[*pos] && ! (ARGS_NOWARN & fl)) 580*80387638SSascha Wildner if ( ! mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE)) 581*80387638SSascha Wildner return(ARGS_ERROR); 582*80387638SSascha Wildner 583*80387638SSascha Wildner return(ARGS_WORD); 584*80387638SSascha Wildner } 585*80387638SSascha Wildner 586*80387638SSascha Wildner 587*80387638SSascha Wildner static enum mdocargt 588*80387638SSascha Wildner argv_a2arg(enum mdoct tok, const char *p) 589*80387638SSascha Wildner { 590*80387638SSascha Wildner 591*80387638SSascha Wildner /* 592*80387638SSascha Wildner * Parse an argument identifier from its text. XXX - this 593*80387638SSascha Wildner * should really be table-driven to clarify the code. 594*80387638SSascha Wildner * 595*80387638SSascha Wildner * If you add an argument to the list, make sure that you 596*80387638SSascha Wildner * register it here with its one or more macros! 597*80387638SSascha Wildner */ 598*80387638SSascha Wildner 599*80387638SSascha Wildner switch (tok) { 600*80387638SSascha Wildner case (MDOC_An): 601*80387638SSascha Wildner if (0 == strcmp(p, "split")) 602*80387638SSascha Wildner return(MDOC_Split); 603*80387638SSascha Wildner else if (0 == strcmp(p, "nosplit")) 604*80387638SSascha Wildner return(MDOC_Nosplit); 605*80387638SSascha Wildner break; 606*80387638SSascha Wildner 607*80387638SSascha Wildner case (MDOC_Bd): 608*80387638SSascha Wildner if (0 == strcmp(p, "ragged")) 609*80387638SSascha Wildner return(MDOC_Ragged); 610*80387638SSascha Wildner else if (0 == strcmp(p, "unfilled")) 611*80387638SSascha Wildner return(MDOC_Unfilled); 612*80387638SSascha Wildner else if (0 == strcmp(p, "filled")) 613*80387638SSascha Wildner return(MDOC_Filled); 614*80387638SSascha Wildner else if (0 == strcmp(p, "literal")) 615*80387638SSascha Wildner return(MDOC_Literal); 616*80387638SSascha Wildner else if (0 == strcmp(p, "file")) 617*80387638SSascha Wildner return(MDOC_File); 618*80387638SSascha Wildner else if (0 == strcmp(p, "offset")) 619*80387638SSascha Wildner return(MDOC_Offset); 620*80387638SSascha Wildner else if (0 == strcmp(p, "compact")) 621*80387638SSascha Wildner return(MDOC_Compact); 622*80387638SSascha Wildner else if (0 == strcmp(p, "centered")) 623*80387638SSascha Wildner return(MDOC_Centred); 624*80387638SSascha Wildner break; 625*80387638SSascha Wildner 626*80387638SSascha Wildner case (MDOC_Bf): 627*80387638SSascha Wildner if (0 == strcmp(p, "emphasis")) 628*80387638SSascha Wildner return(MDOC_Emphasis); 629*80387638SSascha Wildner else if (0 == strcmp(p, "literal")) 630*80387638SSascha Wildner return(MDOC_Literal); 631*80387638SSascha Wildner else if (0 == strcmp(p, "symbolic")) 632*80387638SSascha Wildner return(MDOC_Symbolic); 633*80387638SSascha Wildner break; 634*80387638SSascha Wildner 635*80387638SSascha Wildner case (MDOC_Bk): 636*80387638SSascha Wildner if (0 == strcmp(p, "words")) 637*80387638SSascha Wildner return(MDOC_Words); 638*80387638SSascha Wildner break; 639*80387638SSascha Wildner 640*80387638SSascha Wildner case (MDOC_Bl): 641*80387638SSascha Wildner if (0 == strcmp(p, "bullet")) 642*80387638SSascha Wildner return(MDOC_Bullet); 643*80387638SSascha Wildner else if (0 == strcmp(p, "dash")) 644*80387638SSascha Wildner return(MDOC_Dash); 645*80387638SSascha Wildner else if (0 == strcmp(p, "hyphen")) 646*80387638SSascha Wildner return(MDOC_Hyphen); 647*80387638SSascha Wildner else if (0 == strcmp(p, "item")) 648*80387638SSascha Wildner return(MDOC_Item); 649*80387638SSascha Wildner else if (0 == strcmp(p, "enum")) 650*80387638SSascha Wildner return(MDOC_Enum); 651*80387638SSascha Wildner else if (0 == strcmp(p, "tag")) 652*80387638SSascha Wildner return(MDOC_Tag); 653*80387638SSascha Wildner else if (0 == strcmp(p, "diag")) 654*80387638SSascha Wildner return(MDOC_Diag); 655*80387638SSascha Wildner else if (0 == strcmp(p, "hang")) 656*80387638SSascha Wildner return(MDOC_Hang); 657*80387638SSascha Wildner else if (0 == strcmp(p, "ohang")) 658*80387638SSascha Wildner return(MDOC_Ohang); 659*80387638SSascha Wildner else if (0 == strcmp(p, "inset")) 660*80387638SSascha Wildner return(MDOC_Inset); 661*80387638SSascha Wildner else if (0 == strcmp(p, "column")) 662*80387638SSascha Wildner return(MDOC_Column); 663*80387638SSascha Wildner else if (0 == strcmp(p, "width")) 664*80387638SSascha Wildner return(MDOC_Width); 665*80387638SSascha Wildner else if (0 == strcmp(p, "offset")) 666*80387638SSascha Wildner return(MDOC_Offset); 667*80387638SSascha Wildner else if (0 == strcmp(p, "compact")) 668*80387638SSascha Wildner return(MDOC_Compact); 669*80387638SSascha Wildner else if (0 == strcmp(p, "nested")) 670*80387638SSascha Wildner return(MDOC_Nested); 671*80387638SSascha Wildner break; 672*80387638SSascha Wildner 673*80387638SSascha Wildner case (MDOC_Rv): 674*80387638SSascha Wildner /* FALLTHROUGH */ 675*80387638SSascha Wildner case (MDOC_Ex): 676*80387638SSascha Wildner if (0 == strcmp(p, "std")) 677*80387638SSascha Wildner return(MDOC_Std); 678*80387638SSascha Wildner break; 679*80387638SSascha Wildner default: 680*80387638SSascha Wildner break; 681*80387638SSascha Wildner } 682*80387638SSascha Wildner 683*80387638SSascha Wildner return(MDOC_ARG_MAX); 684*80387638SSascha Wildner } 685*80387638SSascha Wildner 686*80387638SSascha Wildner 687*80387638SSascha Wildner static int 688*80387638SSascha Wildner argv_multi(struct mdoc *m, int line, 689*80387638SSascha Wildner struct mdoc_argv *v, int *pos, char *buf) 690*80387638SSascha Wildner { 691*80387638SSascha Wildner enum margserr ac; 692*80387638SSascha Wildner char *p; 693*80387638SSascha Wildner 694*80387638SSascha Wildner for (v->sz = 0; ; v->sz++) { 695*80387638SSascha Wildner if ('-' == buf[*pos]) 696*80387638SSascha Wildner break; 697*80387638SSascha Wildner ac = args(m, line, pos, buf, 0, &p); 698*80387638SSascha Wildner if (ARGS_ERROR == ac) 699*80387638SSascha Wildner return(0); 700*80387638SSascha Wildner else if (ARGS_EOLN == ac) 701*80387638SSascha Wildner break; 702*80387638SSascha Wildner 703*80387638SSascha Wildner if (0 == v->sz % MULTI_STEP) 704*80387638SSascha Wildner v->value = mandoc_realloc(v->value, 705*80387638SSascha Wildner (v->sz + MULTI_STEP) * sizeof(char *)); 706*80387638SSascha Wildner 707*80387638SSascha Wildner v->value[(int)v->sz] = mandoc_strdup(p); 708*80387638SSascha Wildner } 709*80387638SSascha Wildner 710*80387638SSascha Wildner return(1); 711*80387638SSascha Wildner } 712*80387638SSascha Wildner 713*80387638SSascha Wildner 714*80387638SSascha Wildner static int 715*80387638SSascha Wildner argv_opt_single(struct mdoc *m, int line, 716*80387638SSascha Wildner struct mdoc_argv *v, int *pos, char *buf) 717*80387638SSascha Wildner { 718*80387638SSascha Wildner enum margserr ac; 719*80387638SSascha Wildner char *p; 720*80387638SSascha Wildner 721*80387638SSascha Wildner if ('-' == buf[*pos]) 722*80387638SSascha Wildner return(1); 723*80387638SSascha Wildner 724*80387638SSascha Wildner ac = args(m, line, pos, buf, 0, &p); 725*80387638SSascha Wildner if (ARGS_ERROR == ac) 726*80387638SSascha Wildner return(0); 727*80387638SSascha Wildner if (ARGS_EOLN == ac) 728*80387638SSascha Wildner return(1); 729*80387638SSascha Wildner 730*80387638SSascha Wildner v->sz = 1; 731*80387638SSascha Wildner v->value = mandoc_malloc(sizeof(char *)); 732*80387638SSascha Wildner v->value[0] = mandoc_strdup(p); 733*80387638SSascha Wildner 734*80387638SSascha Wildner return(1); 735*80387638SSascha Wildner } 736*80387638SSascha Wildner 737*80387638SSascha Wildner 738*80387638SSascha Wildner /* 739*80387638SSascha Wildner * Parse a single, mandatory value from the stream. 740*80387638SSascha Wildner */ 741*80387638SSascha Wildner static int 742*80387638SSascha Wildner argv_single(struct mdoc *m, int line, 743*80387638SSascha Wildner struct mdoc_argv *v, int *pos, char *buf) 744*80387638SSascha Wildner { 745*80387638SSascha Wildner int ppos; 746*80387638SSascha Wildner enum margserr ac; 747*80387638SSascha Wildner char *p; 748*80387638SSascha Wildner 749*80387638SSascha Wildner ppos = *pos; 750*80387638SSascha Wildner 751*80387638SSascha Wildner ac = args(m, line, pos, buf, 0, &p); 752*80387638SSascha Wildner if (ARGS_EOLN == ac) { 753*80387638SSascha Wildner mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT); 754*80387638SSascha Wildner return(0); 755*80387638SSascha Wildner } else if (ARGS_ERROR == ac) 756*80387638SSascha Wildner return(0); 757*80387638SSascha Wildner 758*80387638SSascha Wildner v->sz = 1; 759*80387638SSascha Wildner v->value = mandoc_malloc(sizeof(char *)); 760*80387638SSascha Wildner v->value[0] = mandoc_strdup(p); 761*80387638SSascha Wildner 762*80387638SSascha Wildner return(1); 763*80387638SSascha Wildner } 764*80387638SSascha Wildner 765*80387638SSascha Wildner 766*80387638SSascha Wildner /* 767*80387638SSascha Wildner * Determine rules for parsing arguments. Arguments can either accept 768*80387638SSascha Wildner * no parameters, an optional single parameter, one parameter, or 769*80387638SSascha Wildner * multiple parameters. 770*80387638SSascha Wildner */ 771*80387638SSascha Wildner static int 772*80387638SSascha Wildner argv(struct mdoc *mdoc, int line, 773*80387638SSascha Wildner struct mdoc_argv *v, int *pos, char *buf) 774*80387638SSascha Wildner { 775*80387638SSascha Wildner 776*80387638SSascha Wildner v->sz = 0; 777*80387638SSascha Wildner v->value = NULL; 778*80387638SSascha Wildner 779*80387638SSascha Wildner switch (mdoc_argvflags[v->arg]) { 780*80387638SSascha Wildner case (ARGV_SINGLE): 781*80387638SSascha Wildner return(argv_single(mdoc, line, v, pos, buf)); 782*80387638SSascha Wildner case (ARGV_MULTI): 783*80387638SSascha Wildner return(argv_multi(mdoc, line, v, pos, buf)); 784*80387638SSascha Wildner case (ARGV_OPT_SINGLE): 785*80387638SSascha Wildner return(argv_opt_single(mdoc, line, v, pos, buf)); 786*80387638SSascha Wildner default: 787*80387638SSascha Wildner /* ARGV_NONE */ 788*80387638SSascha Wildner break; 789*80387638SSascha Wildner } 790*80387638SSascha Wildner 791*80387638SSascha Wildner return(1); 792*80387638SSascha Wildner } 793