1 /* vi: set sw=4 ts=4:
2 *
3 * split.c
4 *
5 * Function that splits a string intro arguments with quoting.
6 *
7 * by Nick Patavalis (npat@efault.net)
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
22 * USA
23 */
24
25 #include <stdlib.h>
26 #include <string.h>
27 #include <assert.h>
28
29 #include "split.h"
30
31 /* Lexer error end-codes */
32 enum err_codes {
33 ERR_OK = 0, /* no error, string lexed ok */
34 ERR_BS_AT_EOS, /* backslash at the end of string */
35 ERR_SQ_OPEN_AT_EOS, /* single-quote left open */
36 ERR_DQ_OPEN_AT_EOS /* double-quote left open */
37 };
38
39 /* Lexer states */
40 enum states {
41 ST_DELIM,
42 ST_QUOTE,
43 ST_ARG,
44 ST_END
45 };
46
47 /* Special characters */
48 #define BS '\\'
49 #define SQ '\''
50 #define DQ '\"'
51 #define NL '\n'
52 #define EOS '\0'
53
54 #define is_delim(c) \
55 ( (c) == ' ' || (c) == '\t' || (c) == '\n' )
56
57 #define is_dq_escapable(c) \
58 ( (c) == '\\' || (c) == '\"' || (c) == '`' || (c) == '$' )
59
60 /* Short-hands used in split_quoted() */
61 #define push() \
62 do { \
63 char *arg; \
64 if ( *argc < argv_sz ) { \
65 *ap = '\0'; \
66 arg = strdup(arg_buff); \
67 /* !! out of mem !! */ \
68 if ( ! arg ) return -1; \
69 argv[*argc] = arg; \
70 (*argc)++; \
71 } else { \
72 flags |= SPLIT_DROP; \
73 } \
74 ap = &arg_buff[0]; \
75 } while(0)
76
77 #define save() \
78 do { \
79 if (ap != ae) { \
80 *ap++ = *c; \
81 } else { \
82 flags |= SPLIT_TRUNC; \
83 } \
84 } while (0)
85
86 int
split_quoted(const char * s,int * argc,char * argv[],int argv_sz)87 split_quoted (const char *s, int *argc, char *argv[], int argv_sz)
88 {
89 char arg_buff[MAX_ARG_LEN]; /* current argument buffer */
90 char *ap, *ae; /* arg_buff current ptr & end-guard */
91 const char *c; /* current input charcter ptr */
92 char qc; /* current quote character */
93 enum states state; /* current state */
94 enum err_codes err; /* error end-code */
95 int flags; /* warning flags */
96
97 ap = &arg_buff[0];
98 ae = &arg_buff[MAX_ARG_LEN - 1];
99 c = &s[0];
100 state = ST_DELIM;
101 err = ERR_OK;
102 flags = 0;
103 qc = SQ; /* silence compiler waring */
104
105 while ( state != ST_END ) {
106 switch (state) {
107 case ST_DELIM:
108 while ( is_delim(*c) ) c++;
109 if ( *c == SQ || *c == DQ ) {
110 qc = *c; c++; state = ST_QUOTE;
111 break;
112 }
113 if ( *c == EOS ) {
114 state = ST_END;
115 break;
116 }
117 if ( *c == BS ) {
118 c++;
119 if ( *c == NL ) {
120 c++;
121 break;
122 }
123 if ( *c == EOS ) {
124 state = ST_END; err = ERR_BS_AT_EOS;
125 break;
126 }
127 }
128 /* All other cases incl. character after BS */
129 save(); c++; state = ST_ARG;
130 break;
131 case ST_QUOTE:
132 while ( *c != qc && ( *c != BS || qc == SQ ) && *c != EOS ) {
133 save(); c++;
134 }
135 if ( *c == qc ) {
136 c++; state = ST_ARG;
137 break;
138 }
139 if ( *c == BS ) {
140 assert (qc == DQ);
141 c++;
142 if ( *c == NL) {
143 c++;
144 break;
145 }
146 if (*c == EOS) {
147 state = ST_END; err = ERR_BS_AT_EOS;
148 break;
149 }
150 if ( ! is_dq_escapable(*c) ) {
151 c--; save(); c++;
152 }
153 save(); c++;
154 break;
155 }
156 if ( *c == EOS ) {
157 state = ST_END; err = ERR_SQ_OPEN_AT_EOS;
158 break;
159 }
160 assert(0);
161 case ST_ARG:
162 if ( *c == SQ || *c == DQ ) {
163 qc = *c; c++; state = ST_QUOTE;
164 break;
165 }
166 if ( is_delim(*c) || *c == EOS ) {
167 push();
168 state = (*c == EOS) ? ST_END : ST_DELIM;
169 c++;
170 break;
171 }
172 if ( *c == BS ) {
173 c++;
174 if ( *c == NL ) {
175 c++;
176 break;
177 }
178 if ( *c == EOS ) {
179 state = ST_END; err = ERR_BS_AT_EOS;
180 break;
181 }
182 }
183 /* All other cases, incl. character after BS */
184 save(); c++;
185 break;
186 default:
187 assert(0);
188 }
189 }
190
191 return ( err != ERR_OK ) ? -1 : flags;
192 }
193
194 /**********************************************************************/
195
196 #if 0
197
198 int
199 main (int argc, char *argv[])
200 {
201 char *my_argv[12];
202 int my_argc, i, r;
203
204 if ( argc != 2 ) {
205 printf("Usage is: %s: <string to split>\n", argv[0]);
206 exit(EXIT_FAILURE);
207 }
208
209 printf("String to split is: [%s]\n", argv[1]);
210 r = split_quoted(argv[1], &my_argc, my_argv, 12);
211 if ( r < 0 ) {
212 printf("Spliting failed!\n");
213 exit(EXIT_FAILURE);
214 }
215 printf("Split ok. SPLIT_DROP is %s, SPLIT_TRUNC is %s\n",
216 (r & SPLIT_DROP) ? "ON" : "off",
217 (r & SPLIT_TRUNC) ? "ON" : "off");
218
219 for (i = 0; i < my_argc; i++)
220 printf("%02d : [%s]\n", i, my_argv[i]);
221
222 return EXIT_SUCCESS;
223 }
224
225 #endif
226
227 /**********************************************************************/
228
229 /*
230 * Local Variables:
231 * mode:c
232 * tab-width: 4
233 * c-basic-offset: 4
234 * indent-tabs-mode: nil
235 * End:
236 */
237