1 /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /* Do not treat - as a terminal character, so "my-token" is one token */
26 #define LWS_TOKENIZE_F_MINUS_NONTERM	(1 << 0)
27 /* Separately report aggregate colon-delimited tokens */
28 #define LWS_TOKENIZE_F_AGG_COLON	(1 << 1)
29 /* Enforce sequencing for a simple token , token , token ... list */
30 #define LWS_TOKENIZE_F_COMMA_SEP_LIST	(1 << 2)
31 /* Allow more characters in the tokens and less delimiters... default is
32  * only alphanumeric + underscore in tokens */
33 #define LWS_TOKENIZE_F_RFC7230_DELIMS	(1 << 3)
34 /* Do not treat . as a terminal character, so "warmcat.com" is one token */
35 #define LWS_TOKENIZE_F_DOT_NONTERM	(1 << 4)
36 /* If something starts looking like a float, like 1.2, force to be string token.
37  * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
38  * avoids illegal float format detection like 1.myserver.com */
39 #define LWS_TOKENIZE_F_NO_FLOATS	(1 << 5)
40 /* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */
41 #define LWS_TOKENIZE_F_NO_INTEGERS	(1 << 6)
42 /* # makes the rest of the line a comment */
43 #define LWS_TOKENIZE_F_HASH_COMMENT	(1 << 7)
44 /* Do not treat / as a terminal character, so "multipart/related" is one token */
45 #define LWS_TOKENIZE_F_SLASH_NONTERM	(1 << 8)
46 /* Do not treat * as a terminal character, so "myfile*" is one token */
47 #define LWS_TOKENIZE_F_ASTERISK_NONTERM	(1 << 9)
48 /* Do not treat = as a terminal character, so "x=y" is one token */
49 #define LWS_TOKENIZE_F_EQUALS_NONTERM	(1 << 10)
50 
51 typedef enum {
52 
53 	LWS_TOKZE_ERRS			=  5, /* the number of errors defined */
54 
55 	LWS_TOKZE_ERR_BROKEN_UTF8	= -5,	/* malformed or partial utf8 */
56 	LWS_TOKZE_ERR_UNTERM_STRING	= -4,	/* ended while we were in "" */
57 	LWS_TOKZE_ERR_MALFORMED_FLOAT	= -3,	/* like 0..1 or 0.1.1 */
58 	LWS_TOKZE_ERR_NUM_ON_LHS	= -2,	/* like 123= or 0.1= */
59 	LWS_TOKZE_ERR_COMMA_LIST	= -1,	/* like ",tok", or, "tok,," */
60 
61 	LWS_TOKZE_ENDED = 0,		/* no more content */
62 
63 	/* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */
64 
65 	LWS_TOKZE_DELIMITER,		/* a delimiter appeared */
66 	LWS_TOKZE_TOKEN,		/* a token appeared */
67 	LWS_TOKZE_INTEGER,		/* an integer appeared */
68 	LWS_TOKZE_FLOAT,		/* a float appeared */
69 	LWS_TOKZE_TOKEN_NAME_EQUALS,	/* token [whitespace] = */
70 	LWS_TOKZE_TOKEN_NAME_COLON,	/* token [whitespace] : (only with
71 					   LWS_TOKENIZE_F_AGG_COLON flag) */
72 	LWS_TOKZE_QUOTED_STRING,	/* "*", where * may have any char */
73 
74 } lws_tokenize_elem;
75 
76 /*
77  * helper enums to allow caller to enforce legal delimiter sequencing, eg
78  * disallow "token,,token", "token,", and ",token"
79  */
80 
81 enum lws_tokenize_delimiter_tracking {
82 	LWSTZ_DT_NEED_FIRST_CONTENT,
83 	LWSTZ_DT_NEED_DELIM,
84 	LWSTZ_DT_NEED_NEXT_CONTENT,
85 };
86 
87 typedef struct lws_tokenize {
88 	const char *start; /**< set to the start of the string to tokenize */
89 	const char *token; /**< the start of an identified token or delimiter */
90 	size_t len;	/**< set to the length of the string to tokenize */
91 	size_t token_len;	/**< the length of the identied token or delimiter */
92 
93 	uint16_t flags;	/**< optional LWS_TOKENIZE_F_ flags, or 0 */
94 	uint8_t delim;
95 
96 	int8_t e; /**< convenient for storing lws_tokenize return */
97 } lws_tokenize_t;
98 
99 /**
100  * lws_tokenize() - breaks down a string into tokens and delimiters in-place
101  *
102  * \param ts: the lws_tokenize struct to init
103  * \param start: the string to tokenize
104  * \param flags: LWS_TOKENIZE_F_ option flags
105  *
106  * This initializes the tokenize struct to point to the given string, and
107  * sets the length to 2GiB - 1 (so there must be a terminating NUL)... you can
108  * override this requirement by setting ts.len yourself before using it.
109  *
110  * .delim is also initialized to LWSTZ_DT_NEED_FIRST_CONTENT.
111  */
112 
113 LWS_VISIBLE LWS_EXTERN void
114 lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags);
115 
116 /**
117  * lws_tokenize() - breaks down a string into tokens and delimiters in-place
118  *
119  * \param ts: the lws_tokenize struct with information and state on what to do
120  *
121  * The \p ts struct should have its start, len and flags members initialized to
122  * reflect the string to be tokenized and any options.
123  *
124  * Then `lws_tokenize()` may be called repeatedly on the struct, returning one
125  * of `lws_tokenize_elem` each time, and with the struct's `token` and
126  * `token_len` members set to describe the content of the delimiter or token
127  * payload each time.
128  *
129  * There are no allocations during the process.
130  *
131  * returns lws_tokenize_elem that was identified (LWS_TOKZE_ENDED means reached
132  * the end of the string).
133  */
134 
135 LWS_VISIBLE LWS_EXTERN lws_tokenize_elem
136 lws_tokenize(struct lws_tokenize *ts);
137 
138 /**
139  * lws_tokenize_cstr() - copy token string to NUL-terminated buffer
140  *
141  * \param ts: pointer to lws_tokenize struct to operate on
142  * \param str: destination buffer
143  * \pparam max: bytes in destination buffer
144  *
145  * returns 0 if OK or nonzero if the string + NUL won't fit.
146  */
147 
148 LWS_VISIBLE LWS_EXTERN int
149 lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max);
150 
151 
152 /*
153  * lws_strexp: flexible string expansion helper api
154  *
155  * This stateful helper can handle multiple separate input chunks and multiple
156  * output buffer loads with arbitrary boundaries between literals and expanded
157  * symbols.  This allows it to handle fragmented input as well as arbitrarily
158  * long symbol expansions that are bigger than the output buffer itself.
159  *
160  * A user callback is used to convert symbol names to the symbol value.
161  *
162  * A single byte buffer for input and another for output can process any
163  * length substitution then.  The state object is around 64 bytes on a 64-bit
164  * system and it only uses 8 bytes stack.
165  */
166 
167 
168 typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out,
169 				    size_t *pos, size_t olen, size_t *exp_ofs);
170 
171 typedef struct lws_strexp {
172 	char			name[32];
173 	lws_strexp_expand_cb	cb;
174 	void			*priv;
175 	char			*out;
176 	size_t			olen;
177 	size_t			pos;
178 
179 	size_t			exp_ofs;
180 
181 	uint8_t			name_pos;
182 	char			state;
183 } lws_strexp_t;
184 
185 enum {
186 	LSTRX_DONE,			/* it completed OK */
187 	LSTRX_FILLED_OUT,		/* out buf filled and needs resetting */
188 	LSTRX_FATAL_NAME_TOO_LONG = -1,	/* fatal */
189 	LSTRX_FATAL_NAME_UNKNOWN  = -2,
190 };
191 
192 
193 /**
194  * lws_strexp_init() - initialize an lws_strexp_t for use
195  *
196  * \p exp: the exp object to init
197  * \p priv: the user's object pointer to pass to callback
198  * \p cb: the callback to expand named objects
199  * \p out: the start of the output buffer, or NULL just to get the length
200  * \p olen: the length of the output buffer in bytes
201  *
202  * Prepares an lws_strexp_t for use and sets the initial output buffer
203  *
204  * If \p out is NULL, substitution proceeds normally, but no output is produced,
205  * only the length is returned.  olen should be set to the largest feasible
206  * overall length.  To use this mode, the substitution callback must also check
207  * for NULL \p out and avoid producing the output.
208  */
209 LWS_VISIBLE LWS_EXTERN void
210 lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb,
211 		char *out, size_t olen);
212 
213 /**
214  * lws_strexp_reset_out() - reset the output buffer on an existing strexp
215  *
216  * \p exp: the exp object to init
217  * \p out: the start of the output buffer, or NULL to just get length
218  * \p olen: the length of the output buffer in bytes
219  *
220  * Provides a new output buffer for lws_strexp_expand() to continue to write
221  * into.  It can be the same as the old one if it has been copied out or used.
222  * The position of the next write will be reset to the start of the given buf.
223  *
224  * If \p out is NULL, substitution proceeds normally, but no output is produced,
225  * only the length is returned.  \p olen should be set to the largest feasible
226  * overall length.  To use this mode, the substitution callback must also check
227  * for NULL \p out and avoid producing the output.
228  */
229 LWS_VISIBLE LWS_EXTERN void
230 lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen);
231 
232 /**
233  * lws_strexp_expand() - copy / expand a string into the output buffer
234  *
235  * \p exp: the exp object for the copy / expansion
236  * \p in: the start of the next input data
237  * \p len: the length of the input data
238  * \p pused_in: pointer to write the amount of input used
239  * \p pused_out: pointer to write the amount of output used
240  *
241  * Copies in to the output buffer set in exp, expanding any ${name} tokens using
242  * the callback.  \p *pused_in is set to the number of input chars used and
243  * \p *pused_out the number of output characters used
244  *
245  * May return LSTRX_FILLED_OUT early with *pused < len if the output buffer is
246  * filled.  Handle the output buffer and reset it with lws_strexp_reset_out()
247  * before calling again with adjusted in / len to continue.
248  *
249  * In the case of large expansions, the expansion itself may fill the output
250  * buffer, in which case the expansion callback returns the LSTRX_FILLED_OUT
251  * and will be called again to continue with its *exp_ofs parameter set
252  * appropriately.
253  */
254 LWS_VISIBLE LWS_EXTERN int
255 lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len,
256 		  size_t *pused_in, size_t *pused_out);
257 
258 /**
259  * lws_strcmp_wildcard() - strcmp but the first arg can have wildcards
260  *
261  * \p wildcard: a string that may contain zero to three *, and may lack a NUL
262  * \p len: length of the wildcard string
263  * \p check: string to test to see if it matches wildcard
264  *
265  * Exactly like strcmp, but supports patterns like "a*", "a*b", "a*b*" etc
266  * where a and b are arbitrary substrings
267  */
268 LWS_VISIBLE LWS_EXTERN int
269 lws_strcmp_wildcard(const char *wildcard, size_t len, const char *check);
270