1 /*-
2  * Copyright 2021 Vsevolod Stakhov
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "css_selector.hxx"
18 #include "css.hxx"
19 #include "libserver/html/html.hxx"
20 #include "fmt/core.h"
21 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
22 #include "doctest/doctest.h"
23 
24 namespace rspamd::css {
25 
process_selector_tokens(rspamd_mempool_t * pool,blocks_gen_functor && next_token_functor)26 auto process_selector_tokens(rspamd_mempool_t *pool,
27 							 blocks_gen_functor &&next_token_functor)
28 	-> selectors_vec
29 {
30 	selectors_vec ret;
31 	bool can_continue = true;
32 	enum class selector_process_state {
33 		selector_parse_start = 0,
34 		selector_expect_ident,
35 		selector_ident_consumed,
36 		selector_ignore_attribute,
37 		selector_ignore_function,
38 		selector_ignore_combination
39 	} state = selector_process_state::selector_parse_start;
40 	std::unique_ptr<css_selector> cur_selector;
41 
42 
43 	while (can_continue) {
44 		const auto &next_tok = next_token_functor();
45 
46 		if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) {
47 			const auto &parser_tok = next_tok.get_token_or_empty();
48 
49 			if (state == selector_process_state::selector_parse_start) {
50 				/*
51 				 * At the beginning of the parsing we can expect either
52 				 * delim or an ident, everything else is discarded for now
53 				 */
54 				msg_debug_css("start consume selector");
55 
56 				switch (parser_tok.type) {
57 				case css_parser_token::token_type::delim_token: {
58 					auto delim_c = parser_tok.get_delim();
59 
60 					if (delim_c == '.') {
61 						cur_selector = std::make_unique<css_selector>(
62 								css_selector::selector_type::SELECTOR_CLASS);
63 						state = selector_process_state::selector_expect_ident;
64 					}
65 					else if (delim_c == '#') {
66 						cur_selector = std::make_unique<css_selector>(
67 								css_selector::selector_type::SELECTOR_ID);
68 						state = selector_process_state::selector_expect_ident;
69 					}
70 					else if (delim_c == '*') {
71 						cur_selector = std::make_unique<css_selector>(
72 								css_selector::selector_type::SELECTOR_ALL);
73 						state = selector_process_state::selector_ident_consumed;
74 					}
75 					break;
76 				}
77 				case css_parser_token::token_type::ident_token: {
78 					auto tag_id = html::html_tag_by_name(parser_tok.get_string_or_default(""));
79 
80 					if (tag_id) {
81 						cur_selector = std::make_unique<css_selector>(tag_id.value());
82 					}
83 					state = selector_process_state::selector_ident_consumed;
84 					break;
85 				}
86 				case css_parser_token::token_type::hash_token:
87 					cur_selector = std::make_unique<css_selector>(
88 							css_selector::selector_type::SELECTOR_ID);
89 					cur_selector->value =
90 							parser_tok.get_string_or_default("");
91 					state = selector_process_state::selector_ident_consumed;
92 					break;
93 				default:
94 					msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected start",
95 							next_tok.token_type_str());
96 					can_continue = false;
97 					break;
98 				}
99 			}
100 			else if (state == selector_process_state::selector_expect_ident) {
101 				/*
102 				 * We got something like a selector start, so we expect
103 				 * a plain ident
104 				 */
105 				if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) {
106 					cur_selector->value = parser_tok.get_string_or_default("");
107 					state = selector_process_state::selector_ident_consumed;
108 				}
109 				else {
110 					msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected ident",
111 							next_tok.token_type_str());
112 					can_continue = false;
113 				}
114 			}
115 			else if (state == selector_process_state::selector_ident_consumed) {
116 				if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
117 					/* Got full selector, attach it to the vector and go further */
118 					msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
119 					ret.push_back(std::move(cur_selector));
120 					state = selector_process_state::selector_parse_start;
121 				}
122 				else if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
123 					/* TODO: implement adjustments */
124 					state = selector_process_state::selector_ignore_function;
125 				}
126 				else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) {
127 					/* TODO: implement attributes checks */
128 					state = selector_process_state::selector_ignore_attribute;
129 				}
130 				else {
131 					/* TODO: implement selectors combinations */
132 					state = selector_process_state::selector_ignore_combination;
133 				}
134 			}
135 			else {
136 				/* Ignore state; ignore all till ',' token or eof token */
137 				if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
138 					/* Got full selector, attach it to the vector and go further */
139 					ret.push_back(std::move(cur_selector));
140 					state = selector_process_state::selector_parse_start;
141 				}
142 				else {
143 					auto debug_str = parser_tok.get_string_or_default("");
144 					msg_debug_css("ignore token %*s", (int)debug_str.size(),
145 							debug_str.data());
146 				}
147 			}
148 		}
149 		else {
150 			/* End of parsing */
151 			if (state == selector_process_state::selector_ident_consumed && cur_selector) {
152 				msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
153 				ret.push_back(std::move(cur_selector));
154 			}
155 			else {
156 				msg_debug_css("not attached selector, state: %d", static_cast<int>(state));
157 			}
158 			can_continue = false;
159 		}
160 
161 	}
162 
163 	return ret; /* copy elision */
164 }
165 
166 auto
debug_str() const167 css_selector::debug_str() const -> std::string
168 {
169 	std::string ret;
170 
171 	if (type == selector_type::SELECTOR_ID) {
172 		ret += "#";
173 	}
174 	else if (type == selector_type::SELECTOR_CLASS) {
175 		ret += ".";
176 	}
177 	else if (type == selector_type::SELECTOR_ALL) {
178 		ret = "*";
179 
180 		return ret;
181 	}
182 
183 	std::visit([&](auto arg) -> void {
184 		using T = std::decay_t<decltype(arg)>;
185 
186 		if constexpr (std::is_same_v<T, tag_id_t>) {
187 			ret += fmt::format("tag: {}", static_cast<int>(arg));
188 		}
189 		else {
190 			ret += arg;
191 		}
192 	}, value);
193 
194 	return ret;
195 }
196 
197 TEST_SUITE("css") {
198 	TEST_CASE("simple css selectors") {
199 		const std::vector<std::pair<const char *, std::vector<css_selector::selector_type>>> cases{
200 				{"em", {css_selector::selector_type::SELECTOR_TAG}},
201 				{"*", {css_selector::selector_type::SELECTOR_ALL}},
202 				{".class", {css_selector::selector_type::SELECTOR_CLASS}},
203 				{"#id", {css_selector::selector_type::SELECTOR_ID}},
204 				{"em,.class,#id", {css_selector::selector_type::SELECTOR_TAG,
205 								   css_selector::selector_type::SELECTOR_CLASS,
206 								   css_selector::selector_type::SELECTOR_ID}},
207 		};
208 
209 		auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
210 			"css", 0);
211 
212 		for (const auto &c : cases) {
213 			auto res = process_selector_tokens(pool,
214 					get_selectors_parser_functor(pool, c.first));
215 
216 			CHECK(c.second.size() == res.size());
217 
218 			for (auto i = 0; i < c.second.size(); i ++) {
219 				CHECK(res[i]->type == c.second[i]);
220 			}
221 		}
222 
223 		rspamd_mempool_delete(pool);
224 	}
225 }
226 
227 }
228 
229