1 /*-
2 * Copyright 2021 Vsevolod Stakhov
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "css_selector.hxx"
18 #include "css.hxx"
19 #include "libserver/html/html.hxx"
20 #include "fmt/core.h"
21 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
22 #include "doctest/doctest.h"
23
24 namespace rspamd::css {
25
process_selector_tokens(rspamd_mempool_t * pool,blocks_gen_functor && next_token_functor)26 auto process_selector_tokens(rspamd_mempool_t *pool,
27 blocks_gen_functor &&next_token_functor)
28 -> selectors_vec
29 {
30 selectors_vec ret;
31 bool can_continue = true;
32 enum class selector_process_state {
33 selector_parse_start = 0,
34 selector_expect_ident,
35 selector_ident_consumed,
36 selector_ignore_attribute,
37 selector_ignore_function,
38 selector_ignore_combination
39 } state = selector_process_state::selector_parse_start;
40 std::unique_ptr<css_selector> cur_selector;
41
42
43 while (can_continue) {
44 const auto &next_tok = next_token_functor();
45
46 if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) {
47 const auto &parser_tok = next_tok.get_token_or_empty();
48
49 if (state == selector_process_state::selector_parse_start) {
50 /*
51 * At the beginning of the parsing we can expect either
52 * delim or an ident, everything else is discarded for now
53 */
54 msg_debug_css("start consume selector");
55
56 switch (parser_tok.type) {
57 case css_parser_token::token_type::delim_token: {
58 auto delim_c = parser_tok.get_delim();
59
60 if (delim_c == '.') {
61 cur_selector = std::make_unique<css_selector>(
62 css_selector::selector_type::SELECTOR_CLASS);
63 state = selector_process_state::selector_expect_ident;
64 }
65 else if (delim_c == '#') {
66 cur_selector = std::make_unique<css_selector>(
67 css_selector::selector_type::SELECTOR_ID);
68 state = selector_process_state::selector_expect_ident;
69 }
70 else if (delim_c == '*') {
71 cur_selector = std::make_unique<css_selector>(
72 css_selector::selector_type::SELECTOR_ALL);
73 state = selector_process_state::selector_ident_consumed;
74 }
75 break;
76 }
77 case css_parser_token::token_type::ident_token: {
78 auto tag_id = html::html_tag_by_name(parser_tok.get_string_or_default(""));
79
80 if (tag_id) {
81 cur_selector = std::make_unique<css_selector>(tag_id.value());
82 }
83 state = selector_process_state::selector_ident_consumed;
84 break;
85 }
86 case css_parser_token::token_type::hash_token:
87 cur_selector = std::make_unique<css_selector>(
88 css_selector::selector_type::SELECTOR_ID);
89 cur_selector->value =
90 parser_tok.get_string_or_default("");
91 state = selector_process_state::selector_ident_consumed;
92 break;
93 default:
94 msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected start",
95 next_tok.token_type_str());
96 can_continue = false;
97 break;
98 }
99 }
100 else if (state == selector_process_state::selector_expect_ident) {
101 /*
102 * We got something like a selector start, so we expect
103 * a plain ident
104 */
105 if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) {
106 cur_selector->value = parser_tok.get_string_or_default("");
107 state = selector_process_state::selector_ident_consumed;
108 }
109 else {
110 msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected ident",
111 next_tok.token_type_str());
112 can_continue = false;
113 }
114 }
115 else if (state == selector_process_state::selector_ident_consumed) {
116 if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
117 /* Got full selector, attach it to the vector and go further */
118 msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
119 ret.push_back(std::move(cur_selector));
120 state = selector_process_state::selector_parse_start;
121 }
122 else if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
123 /* TODO: implement adjustments */
124 state = selector_process_state::selector_ignore_function;
125 }
126 else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) {
127 /* TODO: implement attributes checks */
128 state = selector_process_state::selector_ignore_attribute;
129 }
130 else {
131 /* TODO: implement selectors combinations */
132 state = selector_process_state::selector_ignore_combination;
133 }
134 }
135 else {
136 /* Ignore state; ignore all till ',' token or eof token */
137 if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
138 /* Got full selector, attach it to the vector and go further */
139 ret.push_back(std::move(cur_selector));
140 state = selector_process_state::selector_parse_start;
141 }
142 else {
143 auto debug_str = parser_tok.get_string_or_default("");
144 msg_debug_css("ignore token %*s", (int)debug_str.size(),
145 debug_str.data());
146 }
147 }
148 }
149 else {
150 /* End of parsing */
151 if (state == selector_process_state::selector_ident_consumed && cur_selector) {
152 msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
153 ret.push_back(std::move(cur_selector));
154 }
155 else {
156 msg_debug_css("not attached selector, state: %d", static_cast<int>(state));
157 }
158 can_continue = false;
159 }
160
161 }
162
163 return ret; /* copy elision */
164 }
165
166 auto
debug_str() const167 css_selector::debug_str() const -> std::string
168 {
169 std::string ret;
170
171 if (type == selector_type::SELECTOR_ID) {
172 ret += "#";
173 }
174 else if (type == selector_type::SELECTOR_CLASS) {
175 ret += ".";
176 }
177 else if (type == selector_type::SELECTOR_ALL) {
178 ret = "*";
179
180 return ret;
181 }
182
183 std::visit([&](auto arg) -> void {
184 using T = std::decay_t<decltype(arg)>;
185
186 if constexpr (std::is_same_v<T, tag_id_t>) {
187 ret += fmt::format("tag: {}", static_cast<int>(arg));
188 }
189 else {
190 ret += arg;
191 }
192 }, value);
193
194 return ret;
195 }
196
197 TEST_SUITE("css") {
198 TEST_CASE("simple css selectors") {
199 const std::vector<std::pair<const char *, std::vector<css_selector::selector_type>>> cases{
200 {"em", {css_selector::selector_type::SELECTOR_TAG}},
201 {"*", {css_selector::selector_type::SELECTOR_ALL}},
202 {".class", {css_selector::selector_type::SELECTOR_CLASS}},
203 {"#id", {css_selector::selector_type::SELECTOR_ID}},
204 {"em,.class,#id", {css_selector::selector_type::SELECTOR_TAG,
205 css_selector::selector_type::SELECTOR_CLASS,
206 css_selector::selector_type::SELECTOR_ID}},
207 };
208
209 auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
210 "css", 0);
211
212 for (const auto &c : cases) {
213 auto res = process_selector_tokens(pool,
214 get_selectors_parser_functor(pool, c.first));
215
216 CHECK(c.second.size() == res.size());
217
218 for (auto i = 0; i < c.second.size(); i ++) {
219 CHECK(res[i]->type == c.second[i]);
220 }
221 }
222
223 rspamd_mempool_delete(pool);
224 }
225 }
226
227 }
228
229