1 /* vi: set sw=4 ts=4:
2  *
3  * split.c
4  *
5  * Function that splits a string intro arguments with quoting.
6  *
7  * by Nick Patavalis (npat@efault.net)
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
22  * USA
23  */
24 
25 #include <stdlib.h>
26 #include <string.h>
27 #include <assert.h>
28 
29 #include "split.h"
30 
31 /* Lexer error end-codes */
32 enum err_codes {
33     ERR_OK = 0,         /* no error, string lexed ok */
34     ERR_BS_AT_EOS,      /* backslash at the end of string */
35     ERR_SQ_OPEN_AT_EOS, /* single-quote left open */
36     ERR_DQ_OPEN_AT_EOS  /* double-quote left open */
37 };
38 
39 /* Lexer states */
40 enum states {
41     ST_DELIM,
42     ST_QUOTE,
43     ST_ARG,
44     ST_END
45 };
46 
47 /* Special characters */
48 #define BS '\\'
49 #define SQ '\''
50 #define DQ '\"'
51 #define NL '\n'
52 #define EOS '\0'
53 
54 #define is_delim(c) \
55     ( (c) == ' ' || (c) == '\t' || (c) == '\n' )
56 
57 #define is_dq_escapable(c) \
58     ( (c) == '\\' || (c) == '\"' || (c) == '`' || (c) == '$' )
59 
60 /* Short-hands used in split_quoted() */
61 #define push()                                  \
62     do {                                        \
63         char *arg;                              \
64         if ( *argc < argv_sz ) {                \
65             *ap = '\0';                         \
66             arg = strdup(arg_buff);             \
67             /* !! out of mem !! */              \
68             if ( ! arg ) return -1;             \
69             argv[*argc] = arg;                  \
70             (*argc)++;                          \
71         } else {                                \
72             flags |= SPLIT_DROP;                \
73         }                                       \
74         ap = &arg_buff[0];                      \
75     } while(0)
76 
77 #define save()                                  \
78     do {                                        \
79         if (ap != ae) {                         \
80             *ap++ = *c;                         \
81         } else {                                \
82             flags |= SPLIT_TRUNC;               \
83         }                                       \
84     } while (0)
85 
86 int
split_quoted(const char * s,int * argc,char * argv[],int argv_sz)87 split_quoted (const char *s, int *argc, char *argv[], int argv_sz)
88 {
89     char arg_buff[MAX_ARG_LEN]; /* current argument buffer */
90     char *ap, *ae;              /* arg_buff current ptr & end-guard */
91     const char *c;              /* current input charcter ptr */
92     char qc;                    /* current quote character */
93     enum states state;          /* current state */
94     enum err_codes err;         /* error end-code */
95     int flags;                  /* warning flags */
96 
97     ap = &arg_buff[0];
98     ae = &arg_buff[MAX_ARG_LEN - 1];
99     c = &s[0];
100     state = ST_DELIM;
101     err = ERR_OK;
102     flags = 0;
103     qc = SQ; /* silence compiler waring */
104 
105     while ( state != ST_END ) {
106         switch (state) {
107         case ST_DELIM:
108             while ( is_delim(*c) ) c++;
109             if ( *c == SQ || *c == DQ ) {
110                 qc = *c; c++; state = ST_QUOTE;
111                 break;
112             }
113             if ( *c == EOS ) {
114                 state = ST_END;
115                 break;
116             }
117             if ( *c == BS ) {
118                 c++;
119                 if ( *c == NL ) {
120                     c++;
121                     break;
122                 }
123                 if ( *c == EOS ) {
124                     state = ST_END; err = ERR_BS_AT_EOS;
125                     break;
126                 }
127             }
128             /* All other cases incl. character after BS */
129             save(); c++; state = ST_ARG;
130             break;
131         case ST_QUOTE:
132             while ( *c != qc && ( *c != BS || qc == SQ ) && *c != EOS ) {
133                 save(); c++;
134             }
135             if ( *c == qc ) {
136                 c++; state = ST_ARG;
137                 break;
138             }
139             if ( *c == BS ) {
140                 assert (qc == DQ);
141                 c++;
142                 if ( *c == NL) {
143                     c++;
144                     break;
145                 }
146                 if (*c == EOS) {
147                     state = ST_END; err = ERR_BS_AT_EOS;
148                     break;
149                 }
150                 if ( ! is_dq_escapable(*c) ) {
151                     c--; save(); c++;
152                 }
153                 save(); c++;
154                 break;
155             }
156             if ( *c == EOS ) {
157                 state = ST_END; err = ERR_SQ_OPEN_AT_EOS;
158                 break;
159             }
160             assert(0);
161         case ST_ARG:
162             if ( *c == SQ || *c == DQ ) {
163                 qc = *c; c++; state = ST_QUOTE;
164                 break;
165             }
166             if ( is_delim(*c) || *c == EOS ) {
167                 push();
168                 state = (*c == EOS) ? ST_END : ST_DELIM;
169                 c++;
170                 break;
171             }
172             if ( *c == BS ) {
173                 c++;
174                 if ( *c == NL ) {
175                     c++;
176                     break;
177                 }
178                 if ( *c == EOS ) {
179                     state = ST_END; err = ERR_BS_AT_EOS;
180                     break;
181                 }
182             }
183             /* All other cases, incl. character after BS */
184             save(); c++;
185             break;
186         default:
187             assert(0);
188         }
189     }
190 
191     return ( err != ERR_OK ) ? -1 : flags;
192 }
193 
194 /**********************************************************************/
195 
196 #if 0
197 
198 int
199 main (int argc, char *argv[])
200 {
201     char *my_argv[12];
202     int my_argc, i, r;
203 
204     if ( argc != 2 ) {
205         printf("Usage is: %s: <string to split>\n", argv[0]);
206         exit(EXIT_FAILURE);
207     }
208 
209     printf("String to split is: [%s]\n", argv[1]);
210     r = split_quoted(argv[1], &my_argc, my_argv, 12);
211     if ( r < 0 ) {
212         printf("Spliting failed!\n");
213         exit(EXIT_FAILURE);
214     }
215     printf("Split ok. SPLIT_DROP is %s, SPLIT_TRUNC is %s\n",
216            (r & SPLIT_DROP) ? "ON" : "off",
217            (r & SPLIT_TRUNC) ? "ON" : "off");
218 
219     for (i = 0; i < my_argc; i++)
220         printf("%02d : [%s]\n", i, my_argv[i]);
221 
222     return EXIT_SUCCESS;
223 }
224 
225 #endif
226 
227 /**********************************************************************/
228 
229 /*
230  * Local Variables:
231  * mode:c
232  * tab-width: 4
233  * c-basic-offset: 4
234  * indent-tabs-mode: nil
235  * End:
236  */
237