1 /*
2  * Copyright (c) 2015-2016 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Author: Jan Friesse (jfriesse@redhat.com)
7  *
8  * This software licensed under BSD license, the text of which follows:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * - Redistributions of source code must retain the above copyright notice,
14  *   this list of conditions and the following disclaimer.
15  * - Redistributions in binary form must reproduce the above copyright notice,
16  *   this list of conditions and the following disclaimer in the documentation
17  *   and/or other materials provided with the distribution.
18  * - Neither the name of the Red Hat, Inc. nor the names of its
19  *   contributors may be used to endorse or promote products derived from this
20  *   software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <string.h>
36 
37 #include "dynar-simple-lex.h"
38 
39 /*
40  * Simple_lex is going to be used in protocol and it's not good idea to depend on locale
41  */
42 static int
dynar_simple_lex_is_space(char ch)43 dynar_simple_lex_is_space(char ch)
44 {
45 	return (ch == ' ' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t' || ch == '\v');
46 }
47 
48 void
dynar_simple_lex_init(struct dynar_simple_lex * lex,struct dynar * input,enum dynar_simple_lex_type lex_type)49 dynar_simple_lex_init(struct dynar_simple_lex *lex, struct dynar *input,
50     enum dynar_simple_lex_type lex_type)
51 {
52 
53 	memset(lex, 0, sizeof(*lex));
54 	lex->input = input;
55 	lex->lex_type = lex_type;
56 	dynar_init(&lex->token, dynar_max_size(input));
57 }
58 
59 void
dynar_simple_lex_destroy(struct dynar_simple_lex * lex)60 dynar_simple_lex_destroy(struct dynar_simple_lex *lex)
61 {
62 
63 	dynar_destroy(&lex->token);
64 	memset(lex, 0, sizeof(*lex));
65 }
66 
67 struct dynar *
dynar_simple_lex_token_next(struct dynar_simple_lex * lex)68 dynar_simple_lex_token_next(struct dynar_simple_lex *lex)
69 {
70 	size_t pos;
71 	size_t size;
72 	char *str;
73 	char ch, ch2;
74 	int add_char;
75 	int state;
76 
77 	dynar_clean(&lex->token);
78 
79 	size = dynar_size(lex->input);
80 	str = dynar_data(lex->input);
81 
82 	state = 1;
83 	pos = lex->pos;
84 
85 	while (state != 0) {
86 		if (pos < size) {
87 			ch = str[pos];
88 		} else {
89 			ch = '\0';
90 		}
91 
92 		add_char = 0;
93 
94 		switch (state) {
95 		case 1:
96 			/*
97 			 * Skip spaces. Newline is special and means end of processing
98 			 */
99 			if (pos >= size || ch == '\n' || ch == '\r') {
100 				state = 0;
101 			} else if (dynar_simple_lex_is_space(ch)) {
102 				pos++;
103 			} else {
104 				state = 2;
105 			}
106 			break;
107 		case 2:
108 			/*
109 			 * Read word
110 			 */
111 			if (pos >= size) {
112 				state = 0;
113 			} else if ((lex->lex_type == DYNAR_SIMPLE_LEX_TYPE_BACKSLASH ||
114 			    lex->lex_type == DYNAR_SIMPLE_LEX_TYPE_QUOTE) && ch == '\\') {
115 				pos++;
116 				state = 3;
117 			} else if (lex->lex_type == DYNAR_SIMPLE_LEX_TYPE_QUOTE &&
118 			    ch == '"') {
119 				pos++;
120 				state = 4;
121 			} else if (dynar_simple_lex_is_space(ch)) {
122 				state = 0;
123 			} else {
124 				pos++;
125 				add_char = 1;
126 			}
127 			break;
128 		case 3:
129 			/*
130 			 * Process backslash
131 			 */
132 			if (pos >= size || ch == '\n' || ch == '\r') {
133 				/*
134 				 * End of string. Do not include backslash (it's just ignored)
135 				 */
136 				state = 0;
137 			} else {
138 				add_char = 1;
139 				state = 2;
140 				pos++;
141 			}
142 			break;
143 		case 4:
144 			/*
145 			 * Quote word
146 			 */
147 			if (pos >= size) {
148 				state = 0;
149 			} else if (ch == '\\') {
150 				state = 5;
151 				pos++;
152 			} else if (ch == '"') {
153 				state = 2;
154 				pos++;
155 			} else if (ch == '\n' || ch == '\r') {
156 				state = 0;
157 			} else {
158 				pos++;
159 				add_char = 1;
160 			}
161 			break;
162 		case 5:
163 			/*
164 			 * Quote word backslash
165 			 */
166 			if (pos >= size || ch == '\n' || ch == '\r') {
167 				/*
168 				 * End of string. Do not include backslash (it's just ignored)
169 				 */
170 				state = 0;
171 			} else if (ch == '\\' || ch == '"') {
172 				add_char = 1;
173 				state = 4;
174 				pos++;
175 			} else {
176 				ch2 = '\\';
177 				if (dynar_cat(&lex->token, &ch2, sizeof(ch2)) != 0) {
178 					return (NULL);
179 				}
180 
181 				add_char = 1;
182 				state = 4;
183 				pos++;
184 			}
185 			break;
186 		}
187 
188 		if (add_char) {
189 			if (dynar_cat(&lex->token, &ch, sizeof(ch)) != 0) {
190 				return (NULL);
191 			}
192 		}
193 	}
194 
195 	ch = '\0';
196 	if (dynar_cat(&lex->token, &ch, sizeof(ch)) != 0) {
197 		return (NULL);
198 	}
199 
200 	lex->pos = pos;
201 
202 	return (&lex->token);
203 }
204