1 /* wordsplit - a word splitter
2    Copyright (C) 2009-2019 Sergey Poznyakoff
3 
4    This program is free software; you can redistribute it and/or modify it
5    under the terms of the GNU General Public License as published by the
6    Free Software Foundation; either version 3 of the License, or (at your
7    option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License along
15    with this program. If not, see <http://www.gnu.org/licenses/>. */
16 
17 #ifndef __WORDSPLIT_H
18 #define __WORDSPLIT_H
19 
20 #include <stddef.h>
21 
22 typedef struct wordsplit wordsplit_t;
23 
24 /* Structure used to direct the splitting.  Members marked with [Input]
25    can be defined before calling wordsplit(), those marked with [Output]
26    provide return values when the function returns.  If neither mark is
27    used, the member is internal and must not be used by the caller.
28 
29    In the comments below, the identifiers in parentheses indicate bits that
30    must be set (or unset, if starting with !) in ws_flags (if starting with
31    WRDSF_) or ws_options (if starting with WRDSO_) to initialize or use the
32    given member.
33 
34    If not redefined explicitly, most of them are set to some reasonable
35    default value upon entry to wordsplit(). */
36 struct wordsplit
37 {
38   size_t ws_wordc;          /* [Output] Number of words in ws_wordv. */
39   char **ws_wordv;          /* [Output] Array of parsed out words. */
40   size_t ws_offs;           /* [Input] (WRDSF_DOOFFS) Number of initial
41 			       elements in ws_wordv to fill with NULLs. */
42   size_t ws_wordn;          /* Number of elements ws_wordv can accomodate. */
43   int ws_flags;             /* [Input] Flags passed to wordsplit. */
44   int ws_options;           /* [Input] (WRDSF_OPTIONS)
45 			       Additional options. */
46   size_t ws_maxwords;       /* [Input] (WRDSO_MAXWORDS) Return at most that
47 			       many words */
48   size_t ws_wordi;          /* [Output] (WRDSF_INCREMENTAL) Total number of
49 			       words returned so far */
50 
51   const char *ws_delim;     /* [Input] (WRDSF_DELIM) Word delimiters. */
52   const char *ws_comment;   /* [Input] (WRDSF_COMMENT) Comment characters. */
53   const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
54 			       with backslash. */
55   const char *ws_namechar;  /* [Input] (WRDSO_NAMECHAR) Characters that can
56 			       be parts of a variable name, in addition to
57 			       alphanumerics and underscore. */
58 
59   void (*ws_alloc_die) (wordsplit_t *wsp);
60 			    /* [Input] (WRDSF_ALLOC_DIE) Function called when
61 			       out of memory.  Must not return. */
62   void (*ws_error) (const char *, ...)
63 		   __attribute__ ((__format__ (__printf__, 1, 2)));
64 			    /* [Input] (WRDSF_ERROR) Function used for error
65 			       reporting */
66   void (*ws_debug) (const char *, ...)
67 		   __attribute__ ((__format__ (__printf__, 1, 2)));
68 			    /* [Input] (WRDSF_DEBUG) Function used for debug
69 			       output. */
70   const char **ws_env;      /* [Input] (WRDSF_ENV, !WRDSF_NOVAR) Array of
71 			       environment variables. */
72 
73 	/* Temporary storage for environment variables. It is initialized
74 	   upon first assignment which occurs during the parsing process
75 	   (e.g. ${x:=2}). When this happens, all variables from ws_env are
76 	   moved to ws_envbuf first, and the ws_envbuf address is assigned
77 	   to ws_env. From this moment on, all variable expansions are served
78 	   from ws_envbuf. */
79   char **ws_envbuf;         /* Storage for variables */
80   size_t ws_envidx;         /* Index of first free slot */
81   size_t ws_envsiz;         /* Size of the ws_envbuf array */
82 
83   char const **ws_paramv;   /* [WRDSO_PARAMV] User-supplied positional
84 			       parameters */
85   size_t ws_paramc;         /* Number of positional parameters */
86 
87 	/* Temporary storage for parameters. Works similarly to ws_enbuf.
88 	 */
89   char **ws_parambuf;
90   size_t ws_paramidx;
91   size_t ws_paramsiz;
92 
93   int (*ws_getvar) (char **ret, const char *var, size_t len, void *clos);
94 			    /* [Input] (WRDSF_GETVAR, !WRDSF_NOVAR) Looks up
95 			       the name VAR (LEN bytes long) in the table of
96 			       variables and if found returns in memory
97 			       location pointed to by RET the value of that
98 			       variable.  Returns WRDSE_OK (0) on success,
99 			       and an error code (see WRDSE_* defines below)
100 			       on error.  User-specific errors can be returned
101 			       by storing the error diagnostic string in RET
102 			       and returning WRDSE_USERERR.
103 			       Whatever is stored in RET, it must be allocated
104 			       using malloc(3). */
105   void *ws_closure;         /* [Input] (WRDSF_CLOSURE) Passed as the CLOS
106 			       argument to ws_getvar and ws_command. */
107   int (*ws_command) (char **ret, const char *cmd, size_t len, char **argv,
108 		     void *clos);
109 			    /* [Input] (!WRDSF_NOCMD) Returns in the memory
110 			       location pointed to by RET the expansion of
111 			       the command CMD (LEN bytes long).  On input,
112 			       ARGV contains CMD split out to words.
113 
114 			       See ws_getvar for a discussion of possible
115 			       return values. */
116 
117   const char *ws_input;     /* Input string (the S argument to wordsplit). */
118   size_t ws_len;            /* Length of ws_input. */
119   size_t ws_endp;           /* Points past the last processed byte in
120 			       ws_input. */
121   int ws_errno;             /* [Output] Error code, if an error occurred. */
122   char *ws_usererr;         /* Points to textual description of
123 			       the error, if ws_errno is WRDSE_USERERR.  Must
124 			       be allocated with malloc(3). */
125   char *ws_errctx;          /* Context in which the error occurred:
126 			       For WRDSE_UNDEF - name of the undefined variable,
127 			       For WRDSE_GLOBERR - pattern that caused error.
128 			    */
129   struct wordsplit_node *ws_head, *ws_tail;
130 			    /* Doubly-linked list of parsed out nodes. */
131   char ws_sep[2];           /* Temporary storage used during splitting */
132   int ws_lvl;               /* Invocation nesting level. */
133 };
134 
135 /* Initial size for ws_env, if allocated automatically */
136 #define WORDSPLIT_ENV_INIT 16
137 
138 /* Wordsplit flags. */
139 /* Append the words found to the array resulting from a previous
140    call. */
141 #define WRDSF_APPEND            0x00000001
142 /* Insert ws_offs initial NULLs in the array ws_wordv.
143    (These are not counted in the returned ws_wordc.) */
144 #define WRDSF_DOOFFS            0x00000002
145 /* Don't do command substitution. */
146 #define WRDSF_NOCMD             0x00000004
147 /* The parameter p resulted from a previous call to
148    wordsplit(), and wordsplit_free() was not called. Reuse the
149    allocated storage. */
150 #define WRDSF_REUSE             0x00000008
151 /* Print errors */
152 #define WRDSF_SHOWERR           0x00000010
153 /* Consider it an error if an undefined variable is expanded. */
154 #define WRDSF_UNDEF             0x00000020
155 /* Don't do variable expansion. */
156 #define WRDSF_NOVAR             0x00000040
157 /* Abort on ENOMEM error */
158 #define WRDSF_ENOMEMABRT        0x00000080
159 /* Trim off any leading and trailind whitespace */
160 #define WRDSF_WS                0x00000100
161 /* Handle single quotes */
162 #define WRDSF_SQUOTE            0x00000200
163 /* Handle double quotes */
164 #define WRDSF_DQUOTE            0x00000400
165 /* Handle single and double quotes */
166 #define WRDSF_QUOTE             (WRDSF_SQUOTE|WRDSF_DQUOTE)
167 /* Replace each input sequence of repeated delimiters with a single
168    delimiter */
169 #define WRDSF_SQUEEZE_DELIMS    0x00000800
170 /* Return delimiters */
171 #define WRDSF_RETURN_DELIMS     0x00001000
172 /* Treat sed expressions as words */
173 #define WRDSF_SED_EXPR          0x00002000
174 /* ws_delim field is initialized */
175 #define WRDSF_DELIM             0x00004000
176 /* ws_comment field is initialized */
177 #define WRDSF_COMMENT           0x00008000
178 /* ws_alloc_die field is initialized */
179 #define WRDSF_ALLOC_DIE         0x00010000
180 /* ws_error field is initialized */
181 #define WRDSF_ERROR             0x00020000
182 /* ws_debug field is initialized */
183 #define WRDSF_DEBUG             0x00040000
184 /* ws_env field is initialized */
185 #define WRDSF_ENV               0x00080000
186 /* ws_getvar field is initialized */
187 #define WRDSF_GETVAR            0x00100000
188 /* enable debugging */
189 #define WRDSF_SHOWDBG           0x00200000
190 /* Don't split input into words.  Useful for side effects. */
191 #define WRDSF_NOSPLIT           0x00400000
192 /* Keep undefined variables in place, instead of expanding them to
193    empty strings. */
194 #define WRDSF_KEEPUNDEF         0x00800000
195 /* Warn about undefined variables */
196 #define WRDSF_WARNUNDEF         0x01000000
197 /* Handle C escapes */
198 #define WRDSF_CESCAPES          0x02000000
199 /* ws_closure is set */
200 #define WRDSF_CLOSURE           0x04000000
201 /* ws_env is a Key/Value environment, i.e. the value of a variable is
202    stored in the element that follows its name. */
203 #define WRDSF_ENV_KV            0x08000000
204 /* ws_escape is set */
205 #define WRDSF_ESCAPE            0x10000000
206 /* Incremental mode */
207 #define WRDSF_INCREMENTAL       0x20000000
208 /* Perform pathname and tilde expansion */
209 #define WRDSF_PATHEXPAND        0x40000000
210 /* ws_options is initialized */
211 #define WRDSF_OPTIONS           0x80000000
212 
213 #define WRDSF_DEFFLAGS	       \
214   (WRDSF_NOVAR | WRDSF_NOCMD | \
215    WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
216 
217 /* Remove the word that produces empty string after path expansion */
218 #define WRDSO_NULLGLOB        0x00000001
219 /* Print error message if path expansion produces empty string */
220 #define WRDSO_FAILGLOB        0x00000002
221 /* Allow a leading period to be matched by metacharacters. */
222 #define WRDSO_DOTGLOB         0x00000004
223 /* Prefer ws_getvar over lookup in ws_env, if both are supplied */
224 #define WRDSO_GETVARPREF      0x00000008
225 /* Keep backslash in unrecognized escape sequences in words */
226 #define WRDSO_BSKEEP_WORD     0x00000010
227 /* Handle octal escapes in words */
228 #define WRDSO_OESC_WORD       0x00000020
229 /* Handle hex escapes in words */
230 #define WRDSO_XESC_WORD       0x00000040
231 
232 /* ws_maxwords field is initialized */
233 #define WRDSO_MAXWORDS        0x00000080
234 
235 /* Keep backslash in unrecognized escape sequences in quoted strings */
236 #define WRDSO_BSKEEP_QUOTE    0x00000100
237 /* Handle octal escapes in quoted strings */
238 #define WRDSO_OESC_QUOTE      0x00000200
239 /* Handle hex escapes in quoted strings */
240 #define WRDSO_XESC_QUOTE      0x00000400
241 /* Unused: 0x00000800 */
242 /* Don't split variable references, even if they contain whitespace
243    (e.g. ${VAR:-foo bar}) */
244 #define WRDSO_NOVARSPLIT     0x00001000
245 /* Don't split commands, even containing whitespace, e.g.
246    $(echo foo bar) */
247 #define WRDSO_NOCMDSPLIT     0x00002000
248 
249 /* Enable positional parameters */
250 #define WRDSO_PARAMV         0x00004000
251 /* Enable negative positional indices (${-1} is the last positional
252    parameter) */
253 #define WRDSO_PARAM_NEGIDX   0x00008000
254 /* ws_namechar member is initialized */
255 #define WRDSO_NAMECHAR       0x00010000
256 
257 #define WRDSO_BSKEEP          WRDSO_BSKEEP_WORD
258 #define WRDSO_OESC            WRDSO_OESC_WORD
259 #define WRDSO_XESC            WRDSO_XESC_WORD
260 
261 /* Indices into ws_escape */
262 #define WRDSX_WORD  0
263 #define WRDSX_QUOTE 1
264 
265 /* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */
266 #define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
267 /* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */
268 #define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
269 
270 #define WRDSE_OK         0
271 #define WRDSE_EOF        WRDSE_OK
272 #define WRDSE_QUOTE      1
273 #define WRDSE_NOSPACE    2
274 #define WRDSE_USAGE      3
275 #define WRDSE_CBRACE     4
276 #define WRDSE_UNDEF      5
277 #define WRDSE_NOINPUT    6
278 #define WRDSE_PAREN      7
279 #define WRDSE_GLOBERR    8
280 #define WRDSE_USERERR    9
281 #define WRDSE_BADPARAM  10
282 
283 int wordsplit (const char *s, wordsplit_t *ws, int flags);
284 int wordsplit_len (const char *s, size_t len, wordsplit_t *ws, int flags);
285 void wordsplit_free (wordsplit_t *ws);
286 void wordsplit_free_words (wordsplit_t *ws);
287 void wordsplit_free_envbuf (wordsplit_t *ws);
288 void wordsplit_free_parambuf (struct wordsplit *ws);
289 int wordsplit_get_words (wordsplit_t *ws, size_t *wordc, char ***wordv);
290 
291 static inline void wordsplit_getwords (wordsplit_t *ws, size_t *wordc, char ***wordv)
292   __attribute__ ((deprecated));
293 
294 static inline void
wordsplit_getwords(wordsplit_t * ws,size_t * wordc,char *** wordv)295 wordsplit_getwords (wordsplit_t *ws, size_t *wordc, char ***wordv)
296 {
297   wordsplit_get_words (ws, wordc, wordv);
298 }
299 
300 int wordsplit_append (wordsplit_t *wsp, int argc, char **argv);
301 
302 int wordsplit_c_unquote_char (int c);
303 int wordsplit_c_quote_char (int c);
304 size_t wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote);
305 void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
306 
307 void wordsplit_perror (wordsplit_t *ws);
308 const char *wordsplit_strerror (wordsplit_t *ws);
309 
310 void wordsplit_clearerr (wordsplit_t *ws);
311 
312 #endif
313