1 /*
2  * fast_css_transform.cpp
3  * Copyright (C) 2021 Kovid Goyal <kovid at kovidgoyal.net>
4  *
5  * Distributed under terms of the GPL3 license.
6  */
7 
8 // See https://www.w3.org/TR/css-syntax-3
9 
10 #define PY_SSIZE_T_CLEAN
11 #define UNICODE
12 #define _UNICODE
13 #include <Python.h>
14 #include <stdlib.h>
15 #include <bitset>
16 #include <vector>
17 #include <stack>
18 #include <exception>
19 #include <stdexcept>
20 #include <iostream>
21 #include <string>
22 #include <functional>
23 #include <locale>
24 #include <codecvt>
25 #include <frozen/unordered_map.h>
26 #include <frozen/string.h>
27 #include "../utils/cpp_binding.h"
28 #define STB_SPRINTF_IMPLEMENTATION
29 #include "../utils/stb_sprintf.h"
30 
31 // character classes {{{
32 static inline bool
is_whitespace(char32_t ch)33 is_whitespace(char32_t ch) {
34     return ch == ' ' || ch == '\n' || ch == '\t';
35 }
36 
37 static inline bool
is_surrogate(char32_t ch)38 is_surrogate(char32_t ch) {
39     return 0xd800 <= ch && ch <= 0xdfff;
40 }
41 
42 static inline bool
is_hex_digit(char32_t ch)43 is_hex_digit(char32_t ch) {
44     return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F');
45 }
46 
47 static inline bool
is_letter(char32_t ch)48 is_letter(char32_t ch) {
49     return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z');
50 }
51 
52 static inline bool
is_digit(char32_t ch)53 is_digit(char32_t ch) {
54     return '0' <= ch && ch <= '9';
55 }
56 
57 static inline bool
is_name_start(char32_t ch)58 is_name_start(char32_t ch) {
59     return is_letter(ch) || ch == '_' || ch >= 0x80;
60 }
61 
62 static inline bool
is_name_body(char32_t ch)63 is_name_body(char32_t ch) {
64     return is_name_start(ch) || is_digit(ch) || ch == '-';
65 }
66 
67 static inline bool
is_name(char32_t ch)68 is_name(char32_t ch) {
69     return is_name_start(ch) || is_digit(ch) || ch == '-';
70 }
71 
72 static inline bool
is_printable_ascii(char32_t ch)73 is_printable_ascii(char32_t ch) {
74 	return ch >= ' ' && ch <= '~';
75 }
76 
77 // }}}
78 
79 class python_error : public std::runtime_error {
80 	public:
python_error(const char * msg)81 		python_error(const char *msg) : std::runtime_error(msg) {}
82 };
83 
84 // Parse numbers {{{
85 
86 typedef long long integer_type;
87 
88 class ParsedNumber {
89 	public:
90 		bool is_integer;
91 		integer_type integer_value;
92 		double float_value;
ParsedNumber(integer_type val)93 		ParsedNumber(integer_type val) : is_integer(true), integer_value(val), float_value(0) {}
ParsedNumber(double val)94 		ParsedNumber(double val) : is_integer(false), integer_value(0), float_value(val) {}
as_double() const95         double as_double() const { return is_integer ? (double)integer_value : float_value; }
96 };
97 
98 static const double base_font_size = 16.0, dpi = 96.0, pt_to_px = dpi / 72.0, pt_to_rem = pt_to_px / base_font_size;
99 
100 static double
convert_font_size(double val,double factor)101 convert_font_size(double val, double factor) {
102 	return (factor == 0.0) ? (val / base_font_size) : (val * factor * pt_to_rem);
103 }
104 
105 static integer_type
ipow(integer_type base,integer_type exp)106 ipow(integer_type base, integer_type exp) {
107     integer_type result = 1;
108     while(true) {
109         if (exp & 1) result *= base;
110         exp >>= 1;
111         if (!exp) break;
112         base *= base;
113     }
114     return result;
115 }
116 
117 template <typename T>
118 static integer_type
parse_integer(const T & src,const size_t first,size_t last)119 parse_integer(const T &src, const size_t first, size_t last) {
120 	integer_type ans = 0, base = 1;
121     while(true) {
122 		integer_type digit = src[last] - '0';
123 		ans += digit * base;
124         if (last == first) break;
125         last--;
126         base *= 10;
127     }
128     return ans;
129 }
130 
131 template <typename T>
132 static ParsedNumber
parse_css_number(const T & src,size_t limit=0)133 parse_css_number(const T &src, size_t limit = 0) {
134 	int sign = 1, exponent_sign = 1;
135 	integer_type integer_part = 0, fractional_part = 0, exponent_part = 0;
136 	size_t num_of_fractional_digits = 0;
137 	size_t first_digit = 0, last_digit = 0;
138 	const size_t src_sz = limit ? limit : src.size();
139 	size_t pos = 0;
140 #define read_sign(which) { if (pos < src_sz && (src[pos] == '+' || src[pos] == '-')) { if (src[pos++] == '-') which = -1; }}
141 #define read_integer(which) { \
142 	if (pos < src_sz && is_digit(src[pos])) { \
143 		first_digit = pos; \
144 		while (pos + 1 < src_sz && is_digit(src[pos+1])) pos++; \
145 		last_digit = pos++; \
146 		which = parse_integer<T>(src, first_digit, last_digit); \
147 	}}
148 	read_sign(sign);
149 	read_integer(integer_part);
150 	if (pos < src_sz && src[pos] == '.') {
151 		pos++;
152 		read_integer(fractional_part);
153 		if (fractional_part) num_of_fractional_digits = last_digit - first_digit + 1;
154 	}
155 	if (pos < src_sz && (src[pos] == 'e' || src[pos] == 'E')) {
156         pos++;
157 		read_sign(exponent_sign);
158 		read_integer(exponent_part);
159 	}
160 	if (fractional_part || (exponent_part && exponent_sign == -1)) {
161 		double ans = (double)integer_part;
162 		if (fractional_part) ans += ((double) fractional_part) / ((double)(ipow(10, num_of_fractional_digits)));
163 		if (exponent_part) {
164 			if (exponent_sign == -1) ans /= (double)ipow(10, exponent_part);
165 			else ans *= ipow(10, exponent_part);
166 		}
167 		return ParsedNumber(sign * ans);
168 	}
169 	return ParsedNumber(sign * integer_part * ipow(10, exponent_part));
170 #undef read_sign
171 #undef read_integer
172 }
173 // }}}
174 
175 enum class PropertyType : unsigned int {
176 	font_size, page_break, non_standard_writing_mode
177 };
178 
179 constexpr static const auto known_properties = frozen::make_unordered_map<frozen::string, PropertyType>({
180 		{"font-size", PropertyType::font_size},
181 		{"font", PropertyType::font_size},
182 
183 		{"page-break-before", PropertyType::page_break},
184 		{"page-break-after", PropertyType::page_break},
185 		{"page-break-inside", PropertyType::page_break},
186 
187 		{"-webkit-writing-mode", PropertyType::non_standard_writing_mode},
188 		{"-epub-writing-mode", PropertyType::non_standard_writing_mode},
189 });
190 
191 constexpr static const auto font_size_keywords = frozen::make_unordered_map<frozen::string, frozen::string>({
192 		{"xx-small", "0.5rem"},
193 		{"x-small", "0.625rem"},
194 		{"small", "0.8rem"},
195 		{"medium", "1rem"},
196 		{"large", "1.125rem"},
197 		{"x-large", "1.5rem"},
198 		{"xx-large", "2rem"},
199 		{"xxx-large", "2.55rem"}
200 });
201 
202 constexpr static const auto absolute_length_units = frozen::make_unordered_map<frozen::string, double>({
203 	{"mm", 2.8346456693},
204 	{"cm", 28.346456693},
205 	{"in", 72},
206 	{"pc", 12},
207 	{"q", 0.708661417325},
208 	{"px", 0.0},
209 	{"pt", 1.0}
210 });
211 
212 
213 enum class TokenType : unsigned int {
214     whitespace,
215     delimiter,
216     ident,
217     at_keyword,
218     hash,
219     string,
220     url,
221     function_start,
222     number,
223     dimension,
224     cdo,
225     cdc
226 };
227 
228 
229 class Token {
230     enum class NameSerializeState : unsigned { start, one_hyphen, body };
231 
232     private:
233         TokenType type;
234         std::u32string text;
235         size_t unit_at, out_pos;
236 
clear()237         void clear() {
238             type = TokenType::whitespace;
239             text.clear();
240             unit_at = 0;
241         }
242 
serialize_escaped_char(const char32_t ch,std::u32string & out) const243         void serialize_escaped_char(const char32_t ch, std::u32string &out) const {
244             out.push_back('\\');
245             if (is_whitespace(ch) || is_hex_digit(ch)) {
246                 char buf[8];
247                 int num = stbsp_snprintf(buf, sizeof(buf), "%x ", (unsigned int)ch);
248                 if (num > 0) {
249                     out.resize(out.size() + num);
250                     for (int i = 0; i < num; i++) out[i + out.size() - num] = buf[i];
251                 } else throw std::logic_error("Failed to convert character to hexadecimal escape");
252             } else out.push_back(ch);
253         }
254 
serialize_ident(std::u32string & out) const255         void serialize_ident(std::u32string &out) const {
256             NameSerializeState state = NameSerializeState::start;
257             for (const auto ch : text) {
258                 switch(state) {
259                     case NameSerializeState::start:
260                         if (is_name_start(ch)) { out.push_back(ch); state = NameSerializeState::body; }
261                         else if (ch == '-') { out.push_back(ch); state = NameSerializeState::one_hyphen; }
262                         else throw std::logic_error("Unable to serialize ident because of invalid start character");
263                         break;
264                     case NameSerializeState::one_hyphen:
265                         if (is_name_start(ch) || ch == '-') { out.push_back(ch); state = NameSerializeState::body; }
266                         else serialize_escaped_char(ch, out);
267                         break;
268                     case NameSerializeState::body:
269                         if (is_name_body(ch)) out.push_back(ch);
270                         else serialize_escaped_char(ch, out);
271                         break;
272                 }
273             }
274         }
275 
serialize_hash(std::u32string & out) const276         void serialize_hash(std::u32string &out) const {
277             for (const auto ch : text) {
278                 if (is_name_body(ch)) out.push_back(ch);
279                 else serialize_escaped_char(ch, out);
280             }
281         }
282 
serialize_string(std::u32string & out) const283         void serialize_string(std::u32string &out) const {
284             const char32_t delim = text.find('"') == std::u32string::npos ? '"' : '\'';
285             out.push_back(delim);
286             for (const auto ch : text) {
287                 if (ch == '\n') out.append({'\\', '\n'});
288                 else if (ch == delim || ch == '\\') serialize_escaped_char(ch, out);
289                 else out.push_back(ch);
290             }
291             out.push_back(delim);
292         }
293 
294     public:
Token()295         Token() :
296 			type(TokenType::whitespace), text(), unit_at(0), out_pos(0) {
297 				text.reserve(16);
298 			}
299 
Token(const TokenType type,const char32_t ch,size_t out_pos=0)300         Token(const TokenType type, const char32_t ch, size_t out_pos = 0) :
301 			type(type), text(), unit_at(0), out_pos(out_pos) {
302 				text.reserve(16);
303 				if (ch) text.push_back(ch);
304 			}
305 
Token(const Token & other)306         Token(const Token& other) :
307 			type(other.type), text(other.text), unit_at(other.unit_at), out_pos(other.out_pos) {} // copy constructor
308 
Token(Token && other)309         Token(Token&& other) :
310 			type(other.type), text(std::move(other.text)), unit_at(other.unit_at), out_pos(other.out_pos) {} // move constructor
311 
operator =(const Token & other)312         Token& operator=(const Token& other) { // copy assignment
313 			type = other.type; text = other.text; unit_at = other.unit_at; out_pos = other.out_pos; return *this;
314 		}
315 
operator =(Token && other)316         Token& operator=(Token&& other) { // move assignment
317 			type = other.type; text = std::move(other.text); unit_at = other.unit_at; out_pos = other.out_pos; return *this;
318 		}
319 
reset()320 		void reset() {
321 			text.clear(); unit_at = 0; out_pos = 0; type = TokenType::whitespace;
322 		}
323 
get_type() const324 		TokenType get_type() const { return type; }
set_type(const TokenType q)325         void set_type(const TokenType q) { type = q; }
get_output_position() const326         size_t get_output_position() const { return out_pos; }
set_output_position(const size_t val)327 		void set_output_position(const size_t val) { out_pos = val; }
is_type(const TokenType q) const328         bool is_type(const TokenType q) const { return type == q; }
is_delimiter(const char32_t ch) const329         bool is_delimiter(const char32_t ch) const { return type == TokenType::delimiter && text.size() == 1 && text[0] == ch; }
add_char(const char32_t ch)330         void add_char(const char32_t ch) { text.push_back(ch); }
mark_unit()331         void mark_unit() { unit_at = text.size(); }
clear_text()332         void clear_text() { text.clear(); }
333 
text_equals_case_insensitive(const char * lowercase_text) const334         bool text_equals_case_insensitive(const char *lowercase_text) const {
335             const char32_t* str = text.c_str();
336             const unsigned char* q = reinterpret_cast<const unsigned char*>(lowercase_text);
337             static const char delta = 'a' - 'A';
338             for (unsigned i = 0; ; i++) {
339                 if (!str[i]) return q[i] ? false : true;
340                 if (!q[i]) return false;
341                 if (str[i] != q[i] && str[i] + delta != q[i]) return false;
342             }
343             return true;
344         }
345 
text_as_ascii_lowercase(std::string & scratch)346 		bool text_as_ascii_lowercase(std::string &scratch) {
347 			scratch.resize(text.size());
348             size_t i = 0;
349 			for (auto ch : text) {
350 				if (is_printable_ascii(ch)) {
351 					if ('A' <= ch && ch <= 'Z') ch += 'a' - 'A';
352 					scratch[i++] = (char)ch;
353 				} else return false;
354 			}
355             scratch.resize(i);
356 			return true;
357 		}
358 
is_keyword_case_insensitive(const char * lowercase_text) const359 		bool is_keyword_case_insensitive(const char *lowercase_text) const {
360 			return type == TokenType::ident && text_equals_case_insensitive(lowercase_text);
361 		}
362 
trim_trailing_whitespace()363         void trim_trailing_whitespace() {
364             while(text.size() && is_whitespace(text.back())) text.pop_back();
365         }
366 
is_significant() const367 		bool is_significant() const {
368 			switch(type) {
369 				case TokenType::whitespace:
370 				case TokenType::cdo:
371 				case TokenType::cdc:
372 					return false;
373 				default:
374 					return true;
375 			}
376 		}
377 
is_property_terminator() const378 		bool is_property_terminator() const {
379 			switch(type) {
380 				case TokenType::whitespace:
381 					return text.size() > 0 && text.find_first_of('\n') != std::string::npos;
382 				case TokenType::delimiter:
383 					return text.size() == 1 && (text[0] == ';' || text[0] == '}');
384 				default:
385 					return false;
386 			}
387 		}
388 
get_text_as_python() const389 		PyObject* get_text_as_python() const {
390 			PyObject *ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, text.data(), text.size());
391 			if (ans == NULL) throw python_error("Failed to convert token value to python unicode object");
392 			return ans;
393 		}
394 
get_text() const395         const std::u32string& get_text() const {
396             return text;
397         }
398 
type_name() const399         const char* type_name() const {
400 #define n(x) case TokenType::x: return #x;
401             switch(type) {
402                 n(whitespace); n(cdo); n(cdc); n(ident); n(string); n(number);
403                 n(function_start); n(dimension); n(url); n(delimiter); n(at_keyword); n(hash);
404             }
405 #undef n
406         }
407 
erase_text_substring(size_t pos,size_t len)408 		void erase_text_substring(size_t pos, size_t len) {
409 			text.replace(pos, len, (size_t)0u, 0);
410 		}
411 
prepend(const char32_t * src)412 		void prepend(const char32_t *src) {
413 			text.insert(0, src);
414 		}
415 
set_text(const PyObject * src)416 		void set_text(const PyObject* src) {
417 			if (PyUnicode_READY(src) != 0) throw python_error("Failed to set token value from unicode object as readying the unicode object failed");
418 			int kind = PyUnicode_KIND(src); void *data = PyUnicode_DATA(src);
419             text.resize(PyUnicode_GET_LENGTH(src));
420 			for (Py_ssize_t i = 0; i < PyUnicode_GET_LENGTH(src); i++) text[i] = PyUnicode_READ(kind, data, i);
421 		}
422 
set_text(const char32_t * src)423 		void set_text(const char32_t *src) {
424             text = src;
425 		}
426 
set_text(const frozen::string & src)427 		void set_text(const frozen::string &src) {
428             text.resize(src.size());
429             for (size_t i = 0; i < text.size(); i++) text[i] = src[i];
430 		}
431 
set_text(const std::string & src)432         void set_text(const std::string &src) {
433             text.resize(src.size());
434             for (size_t i = 0; i < text.size(); i++) text[i] = src[i];
435         }
436 
set_ascii_text(const char * txt,int sz)437         void set_ascii_text(const char *txt, int sz) {
438             text.resize(sz);
439             for (int i = 0; i < sz; i++) text[i] = txt[i];
440         }
441 
convert_absolute_font_size(std::string & scratch)442         bool convert_absolute_font_size(std::string &scratch) {
443             if (!unit_at || !text_as_ascii_lowercase(scratch)) return false;
444             frozen::string unit(scratch.data() + unit_at, scratch.size() - unit_at);
445             auto lit = absolute_length_units.find(unit);
446             if (lit == absolute_length_units.end()) return false;
447             double val = parse_css_number<std::string>(scratch, unit_at).as_double();
448             double new_val = convert_font_size(val, lit->second);
449             if (val == new_val) return false;
450             char txt[128];
451             // stbsp_snprintf is locale independent unlike std::snprintf
452             int num = stbsp_snprintf(txt, sizeof(txt), "%grem", new_val);
453             if (num <= 0) throw std::runtime_error("Failed to format font size");
454             set_ascii_text(txt, num);
455             return true;
456         }
457 
serialize(std::u32string & out) const458         void serialize(std::u32string &out) const {
459             out.reserve(text.size() + 8);
460             switch (type) {
461                 case TokenType::whitespace:
462                 case TokenType::delimiter:
463                     out.append(text);
464                     break;
465                 case TokenType::ident:
466                     serialize_ident(out);
467                     break;
468                 case TokenType::at_keyword:
469                     out.push_back('@');
470                     serialize_ident(out);
471                     break;
472                 case TokenType::hash:
473                     out.push_back('#');
474                     serialize_hash(out);
475                     break;
476                 case TokenType::string:
477                     serialize_string(out);
478                     break;
479                 case TokenType::url:
480                     out.append({'u', 'r', 'l', '('});
481                     serialize_string(out);
482                     out.push_back(')');
483                     break;
484                 case TokenType::function_start:
485                     serialize_ident(out);
486                     out.push_back('(');
487                     break;
488                 case TokenType::number:
489                 case TokenType::dimension:
490                     out.append(text);
491                     break;
492                 case TokenType::cdo:
493                     out.append({'<', '!', '-', '-'});
494                     break;
495                 case TokenType::cdc:
496                     out.append({'-', '-', '>'});
497                     break;
498             }
499         }
500 
501         friend std::ostream& operator<<(std::ostream& os, const Token& tok);
502 };
503 
operator <<(std::ostream & os,const Token & tok)504 std::ostream& operator<<(std::ostream& os, const Token& tok) {
505     std::u32string rep;
506     std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> cv;
507     tok.serialize(rep);
508     os << cv.to_bytes(rep);
509     return os;
510 }
511 
512 
513 class TokenQueue {
514     private:
515         std::stack<Token> pool;
516         std::vector<Token> queue;
517         std::u32string out;
518 		std::string scratch, scratch2;
519 		pyobject_raii url_callback;
520 
current_output_position() const521 		size_t current_output_position() const { return out.size(); }
522 
new_token(const TokenType type,const char32_t ch=0)523         void new_token(const TokenType type, const char32_t ch = 0) {
524             if (pool.empty()) queue.emplace_back(type, ch, current_output_position());
525             else {
526                 queue.push_back(std::move(pool.top())); pool.pop();
527                 queue.back().set_type(type);
528                 queue.back().set_output_position(current_output_position());
529                 if (ch) queue.back().add_char(ch);
530             }
531         }
532 
add_char_of_type(const TokenType type,const char32_t ch)533         void add_char_of_type(const TokenType type, const char32_t ch) {
534             if (!queue.empty() && queue.back().is_type(type)) queue.back().add_char(ch);
535             else new_token(type, ch);
536         }
537 
return_tokens_to_pool()538 		void return_tokens_to_pool() {
539 			while (queue.size()) {
540 				queue.back().reset();
541 				pool.push(std::move(queue.back()));
542 				queue.pop_back();
543 			}
544 		}
545 
leading_token_of_type(TokenType q) const546 		const Token* leading_token_of_type(TokenType q) const {
547 			for (const auto& tok : queue) {
548 				if (tok.is_significant()) {
549 					return tok.is_type(q) ? &tok : NULL;
550 				}
551 			}
552 			return NULL;
553 		}
554 
leading_token_of_type(TokenType q)555 		Token* leading_token_of_type(TokenType q) {
556 			for (auto& tok : queue) {
557 				if (tok.is_significant()) {
558 					return tok.is_type(q) ? &tok : NULL;
559 				}
560 			}
561 			return NULL;
562 		}
563 
process_urls(const TokenType type=TokenType::url)564 		bool process_urls(const TokenType type = TokenType::url) {
565 			bool changed = false;
566 			if (url_callback) {
567 				for (auto& tok : queue) {
568 					if (tok.is_type(type)) {
569 						pyobject_raii url(tok.get_text_as_python());
570 						pyobject_raii new_url(PyObject_CallFunctionObjArgs(url_callback.ptr(), url.ptr(), NULL));
571 						if (!new_url) { PyErr_Print(); }
572 						else {
573 							if (PyUnicode_Check(new_url.ptr()) && new_url.ptr() != url.ptr()) {
574 								tok.set_text(new_url.ptr());
575 								changed = true;
576 							}
577 						}
578 					}
579 				}
580 			}
581 			return changed;
582 		}
583 
process_declaration()584 		bool process_declaration() {
585 			bool changed = false;
586 			bool colon_found = false, key_found = false, keep_going = true;
587 			std::function<bool(std::vector<Token>::iterator)> process_values;
588 
589 			for (auto it = queue.begin(); keep_going && it < queue.end(); it++) {
590 				if (!it->is_significant()) continue;
591 				if (key_found) {
592 					if (colon_found) {
593 						if (process_values && process_values(it)) changed = true;;
594 					} else {
595 						if (!it->is_delimiter(':')) break;  // no colon found
596 						colon_found = true;
597 					}
598 				} else {
599 					if (it->is_type(TokenType::ident)) {
600 						key_found = true;
601 						if (!it->text_as_ascii_lowercase(scratch)) break; // not a printable ascii property name
602 						frozen::string property_name(scratch.data(), scratch.size());
603 						auto pit = known_properties.find(property_name);
604 						if (pit == known_properties.end()) break; // not a known property
605 						switch(pit->second) {
606 							case PropertyType::font_size:
607 								process_values = std::bind(&TokenQueue::process_font_sizes, this, std::placeholders::_1);
608 								break;
609 							case PropertyType::page_break: {
610 								it->erase_text_substring(0, 5);
611 								size_t pos = std::distance(queue.begin(), it);
612 								std::vector<Token> copies;
613 								copies.reserve(queue.size() + 2);
614 								while (it < queue.end() && !it->is_property_terminator()) { copies.push_back(*(it++)); }
615 								if (copies.size()) {
616 									copies.emplace_back(TokenType::delimiter, ';');
617 									copies.emplace_back(TokenType::whitespace, ' ');
618 									queue.insert(queue.begin() + pos, std::make_move_iterator(copies.begin()), std::make_move_iterator(copies.end()));
619 									size_t idx = pos + copies.size();
620 									queue[idx].prepend(U"-webkit-column-");
621 								}
622 								changed = true; keep_going = false;
623 							}
624 								break;
625 							case PropertyType::non_standard_writing_mode:
626 								it->set_text(U"writing-mode");
627 								changed = true; keep_going = false;
628 								break;
629 						}
630 					} else break;  // no property key found
631 				}
632 			}
633 			return changed;
634 		}
635 
process_font_sizes(std::vector<Token>::iterator it)636 		bool process_font_sizes(std::vector<Token>::iterator it) {
637 			bool changed = false;
638 			for (; it < queue.end(); it++) {
639 				switch (it->get_type()) {
640 					case TokenType::ident:
641 						if (it->text_as_ascii_lowercase(scratch2)) {
642 							frozen::string key(scratch2.data(), scratch2.size());
643 							auto fsm = font_size_keywords.find(key);
644 							if (fsm != font_size_keywords.end()) {
645 								it->set_text(fsm->second);
646                                 it->set_type(TokenType::dimension);
647 								changed = true;
648 							}
649 						}
650 						break;
651 					case TokenType::dimension:
652                         if (it->convert_absolute_font_size(scratch2)) changed = true;
653 						break;
654 					default:
655 						break;
656 				}
657 			}
658 			return changed;
659 		}
660 
661     public:
TokenQueue(const size_t src_sz,PyObject * url_callback_pointer=NULL)662         TokenQueue(const size_t src_sz, PyObject *url_callback_pointer=NULL) :
663 			pool(), queue(), out(), scratch(), scratch2(), url_callback(url_callback_pointer) {
664 				out.reserve(src_sz * 2); scratch.reserve(16); scratch2.reserve(16);
665 				Py_XINCREF(url_callback.ptr());
666 			}
667 
rewind_output()668 		void rewind_output() { out.pop_back(); }
669 
write_to_output(const char32_t what)670         void write_to_output(const char32_t what) { out.push_back(what); }
671 
swap_result_to(std::u32string & result)672 		void swap_result_to(std::u32string &result) { out.swap(result); }
673 
current_token_text_equals_case_insensitive(const char * lowercase_text) const674         bool current_token_text_equals_case_insensitive(const char *lowercase_text) const {
675             if (queue.empty()) return false;
676             return queue.back().text_equals_case_insensitive(lowercase_text);
677         }
678 
add_whitespace(const char32_t ch)679         void add_whitespace(const char32_t ch) { add_char_of_type(TokenType::whitespace, ch); }
680 
start_string()681         void start_string() {
682             if (queue.empty() || !queue.back().is_type(TokenType::string)) new_token(TokenType::string);
683         }
684 
add_char(const char32_t ch)685         void add_char(const char32_t ch) {
686             if (queue.empty()) throw std::logic_error("Attempting to add char to non-existent token");
687             queue.back().add_char(ch);
688         }
689 
make_function_start(bool is_url=false)690         void make_function_start(bool is_url = false) {
691             if (queue.empty()) throw std::logic_error("Attempting to make function start with non-existent token");
692             queue.back().set_type(is_url ? TokenType::url : TokenType::function_start);
693             if (is_url) queue.back().clear_text();
694         }
695 
add_delimiter(const char32_t ch)696         void add_delimiter(const char32_t ch) { new_token(TokenType::delimiter, ch); }
697 
add_hash()698         void add_hash() { new_token(TokenType::hash); }
699 
add_at_keyword()700         void add_at_keyword() { new_token(TokenType::at_keyword); }
701 
add_number(const char32_t ch)702         void add_number(const char32_t ch) { new_token(TokenType::number, ch); }
703 
add_ident(const char32_t ch)704         void add_ident(const char32_t ch) { new_token(TokenType::ident, ch); }
705 
add_cdc()706         void add_cdc() { new_token(TokenType::cdc); }
add_cdo()707         void add_cdo() { new_token(TokenType::cdo); }
708 
mark_unit()709         void mark_unit() {
710             if (queue.empty()) throw std::logic_error("Attempting to mark unit with no token present");
711             queue.back().mark_unit();
712             queue.back().set_type(TokenType::dimension);
713         }
714 
trim_trailing_whitespace()715         void trim_trailing_whitespace() {
716             if (!queue.empty()) queue.back().trim_trailing_whitespace();
717         }
718 
starts_with_at_keyword() const719         bool starts_with_at_keyword() const { return leading_token_of_type(TokenType::at_keyword) != NULL; }
720 
commit_tokens(const char32_t flush_char)721 		void commit_tokens(const char32_t flush_char) {
722 			bool changed = false;
723 			if (flush_char == ';') {
724 				const Token *att = leading_token_of_type(TokenType::at_keyword);
725 				if (process_urls()) changed = true;
726 				if (att) {
727 					if (att->text_equals_case_insensitive("import")) {
728 						if (process_urls(TokenType::string)) changed = true;
729 					}
730 				} else {
731 					if (process_declaration()) changed = true;
732 				}
733 			} else if (flush_char == '{') {
734 				if (process_urls()) changed = true;
735 				const Token *att = leading_token_of_type(TokenType::at_keyword);
736 				if (att && att->text_equals_case_insensitive("import")) {
737 					if (process_urls(TokenType::string)) changed = true;
738 				}
739 			} else {
740 				if (process_urls()) changed = true;
741 				if (process_declaration()) changed = true;
742 			}
743             if (changed && queue.size()) {
744                 const size_t pos = queue[0].get_output_position();
745                 out.resize(pos ? pos - 1: 0);
746                 for (auto tok : queue) tok.serialize(out);
747             }
748 			return_tokens_to_pool();
749 		}
750 };
751 
752 class Parser {
753     private:
754         enum class ParseState : unsigned {
755             normal,
756             escape,
757             comment,
758             string,
759             hash,
760             number,
761             digits,
762             dimension,
763             ident,
764             url, url_start, url_string, url_after_string,
765             at_keyword,
766         };
767 
768         class InputStream { // {{{
769             private:
770                 int kind;
771                 void *data;
772                 const size_t src_sz;
773                 size_t pos;
774 
read(size_t i) const775                 char32_t read(size_t i) const { return PyUnicode_READ(kind, data, i); }
776 
peek_one(size_t at,unsigned * consumed) const777                 char32_t peek_one(size_t at, unsigned *consumed) const {
778                     if (at >= src_sz) { *consumed = 0; return 0; }
779                     *consumed = 1;
780                     char32_t ch = read(at);
781                     if (ch == 0xc) ch = '\n';
782                     else if (ch == '\r') {
783                         ch = '\n';
784                         if (at + 1 < src_sz && read(at + 1) == '\n') *consumed = 2;
785                     }
786                     else if (ch == 0 || is_surrogate(ch)) ch = 0xfffd;
787                     return ch;
788                 }
789 
790             public:
InputStream(PyObject * src)791                 InputStream(PyObject *src) : kind(PyUnicode_KIND(src)), data(PyUnicode_DATA(src)), src_sz(PyUnicode_GET_LENGTH(src)), pos(0) { }
792 
next()793                 char32_t next() {
794                     unsigned last_step_size;
795                     char32_t ans = peek_one(pos, &last_step_size);
796                     pos += last_step_size;
797                     return ans;
798                 }
799 
rewind()800                 void rewind() {
801                     if (!pos) throw std::logic_error("Cannot rewind already at start of stream");
802                     pos -= (read(pos-1) == '\n' && pos >= 2 && read(pos-2) == '\r') ? 2 : 1;
803                 }
804 
peek(unsigned amt=0) const805                 char32_t peek(unsigned amt = 0) const {
806                     char32_t ans = 0;
807                     size_t at = pos;
808                     unsigned consumed;
809                     while(true) {
810                         ans = peek_one(at, &consumed);
811                         if (!amt || !ans) break;
812                         at += consumed;
813                         amt--;
814                     }
815                     return ans;
816                 }
817         }; // end InputStream }}}
818 
819         class BlockTypeFlags : public std::bitset<4> { // {{{
820             enum class Fields : unsigned {
821                 declarations_allowed, qualified_rules_allowed, at_rules_allowed, top_level
822             };
823             public:
BlockTypeFlags(bool declarations_allowed=true,bool qualified_rules_allowed=false,bool at_rules_allowed=false,bool top_level=false)824                 BlockTypeFlags(bool declarations_allowed=true, bool qualified_rules_allowed=false, bool at_rules_allowed=false, bool top_level=false) : std::bitset<4>() {
825                     set((unsigned)Fields::declarations_allowed, declarations_allowed);
826                     set((unsigned)Fields::qualified_rules_allowed, qualified_rules_allowed);
827                     set((unsigned)Fields::at_rules_allowed, at_rules_allowed);
828                     set((unsigned)Fields::top_level, top_level);
829                 }
830 
831 #define PROP(which) \
832                 void set_##which(bool allowed = true) { set((unsigned)Fields::which, allowed); } \
833                 bool which() const { return (*this)[(unsigned)Fields::which]; }
834 
835                 PROP(declarations_allowed)
836                 PROP(qualified_rules_allowed)
837                 PROP(at_rules_allowed)
838                 PROP(top_level)
839 #undef PROP
840         }; // }}}
841 
842         char32_t ch, end_string_with, prev_ch;
843         std::stack<BlockTypeFlags> block_types;
844         std::stack<ParseState> states;
845         char escape_buf[16];
846         unsigned escape_buf_pos;
847         TokenQueue token_queue;
848         InputStream input;
849 
850         // block types {{{
declarations_allowed() const851         bool declarations_allowed() const { return block_types.top().declarations_allowed(); }
qualified_rules_allowed() const852         bool qualified_rules_allowed() const { return block_types.top().qualified_rules_allowed(); }
at_rules_allowed() const853         bool at_rules_allowed() const { return block_types.top().at_rules_allowed(); }
is_top_level() const854         bool is_top_level() const { return block_types.top().top_level(); }
push_block_type(bool declarations_allowed=true,bool qualified_rules_allowed=false,bool at_rules_allowed=false,bool top_level=false)855         void push_block_type(bool declarations_allowed=true, bool qualified_rules_allowed=false, bool at_rules_allowed=false, bool top_level=false) {
856             block_types.emplace(declarations_allowed, qualified_rules_allowed, at_rules_allowed, top_level);
857         }
pop_block_type()858         void pop_block_type() { if (block_types.size() > 1) block_types.pop(); }
859         // }}}
860 
861         // testing stream contents {{{
pop_state()862         void pop_state() { if (states.size() > 1) states.pop(); }
rewind_output()863         void rewind_output() { token_queue.rewind_output(); }
write_to_output(const char32_t what)864         void write_to_output(const char32_t what) { token_queue.write_to_output(what); }
reconsume()865         void reconsume() { input.rewind(); rewind_output(); }
866 
peek(int which=0) const867         char32_t peek(int which = 0) const { return which < 0 ? ch : input.peek(which); }
868 
starting_comment() const869         bool starting_comment() const { return ch == '/' && peek() == '*'; }
870 
starting_string() const871         bool starting_string() const { return ch == '"' || ch == '\''; }
872 
has_valid_escape_next(int offset=0) const873         bool has_valid_escape_next(int offset=0) const {
874             if (peek(offset) != '\\') return false;
875             char32_t second = peek(offset + 1);
876             return second > 0 && second != '\n';
877         }
878 
has_valid_escape() const879         bool has_valid_escape() const { return has_valid_escape_next(-1); }
880 
has_identifier_next(int offset=0) const881         bool has_identifier_next(int offset = 0) const {
882             char32_t first = peek(offset);
883             switch(first) {
884                 case 0:
885                     return false;
886                 case '\\':
887                     return has_valid_escape_next(offset);
888                 case '-': {
889                     char32_t second = peek(offset + 1);
890                     if (is_name_start(second) || second == '-') return true;
891                     if (second == '\\') {
892                         char32_t third = peek(offset + 2);
893                         return third > 0 && third != '\n';
894                     }
895                     return false;
896                 }
897                 default:
898                     return is_name_start(first);
899             }
900         }
901 
has_identifier() const902         bool has_identifier() const { return has_identifier_next(-1); }
903         // }}}
904 
905         // escape {{{
enter_escape_mode()906         void enter_escape_mode() {
907             states.push(ParseState::escape);
908             escape_buf_pos = 0;
909         }
910 
handle_escape()911         void handle_escape() {
912             if (!escape_buf_pos) {
913                 if (ch == '\n') { reconsume(); pop_state(); return; }
914                 if (!is_hex_digit(ch)) {
915                     pop_state();
916                     token_queue.add_char(ch);
917                     return;
918                 }
919                 escape_buf[escape_buf_pos++] = (char)ch;
920                 return;
921             }
922             if (is_hex_digit(ch) && escape_buf_pos < 6) { escape_buf[escape_buf_pos++] = (char)ch; return; }
923             if (is_whitespace(ch)) return;  // a single whitespace character is absorbed into escape
924             reconsume();
925             pop_state();
926             escape_buf[escape_buf_pos] = 0;
927             long kch = strtol(escape_buf, NULL, 16);
928             if (kch > 0 && !is_surrogate(kch)) token_queue.add_char(kch);
929             escape_buf_pos = 0;
930         }
931         // }}}
932 
933         // string {{{
enter_string_mode()934         void enter_string_mode() {
935             states.push(ParseState::string);
936             end_string_with = ch;
937             token_queue.start_string();
938         }
939 
handle_string()940         void handle_string() {
941             if (ch == '\\') {
942                 if (peek() == '\n') input.next();
943                 else enter_escape_mode();
944             }
945             else if (ch == end_string_with) pop_state();
946             else token_queue.add_char(ch);
947         } // }}}
948 
949         // comment {{{
enter_comment_mode()950         void enter_comment_mode() {
951             states.push(ParseState::comment);
952         }
953 
handle_comment()954         void handle_comment() {
955             if (ch == '/' && prev_ch == '*') pop_state();
956         } // }}}
957 
958         // hash {{{
enter_hash_mode()959         void enter_hash_mode() {
960             states.push(ParseState::hash);
961             token_queue.add_hash();
962         }
963 
handle_name()964         void handle_name() {
965             if (is_name(ch)) token_queue.add_char(ch);
966             else if (has_valid_escape()) enter_escape_mode();
967             else if (starting_comment()) enter_comment_mode();
968             else {
969                 reconsume();
970                 pop_state();
971             }
972         }
973 
handle_hash()974         void handle_hash() {
975             handle_name();
976         } // }}}
977 
978         // number {{{
enter_number_mode()979         void enter_number_mode() {
980             states.push(ParseState::number);
981             token_queue.add_number(ch);
982         }
983 
handle_number()984         void handle_number() {
985             if (is_digit(ch) || (ch == '.' && is_digit(peek()))) { token_queue.add_char(ch); return; }
986             if (starting_comment()) { enter_comment_mode(); return; }
987             if ((ch == 'e' || ch == 'E')) {
988                 char32_t next = peek();
989                 if (is_digit(next) || ((next == '+' || next == '-') && is_digit(peek(1)))) {
990                     token_queue.add_char(input.next()); token_queue.add_char(input.next());
991                     pop_state();
992                     enter_digits_mode();
993                     return;
994                 }
995             }
996             reconsume();
997             pop_state();
998             if (has_identifier_next()) { enter_dimension_mode(); }
999         }  // }}}
1000 
1001         // digits {{{
enter_digits_mode()1002         void enter_digits_mode() {
1003             states.push(ParseState::digits);
1004         }
1005 
handle_digits()1006         void handle_digits() {
1007             if (is_digit(ch)) { token_queue.add_char(ch); }
1008             else if (starting_comment()) enter_comment_mode();
1009             else {
1010                 reconsume();
1011                 pop_state();
1012                 if (has_identifier_next()) { enter_dimension_mode(); }
1013             }
1014         } // }}}
1015 
1016         // dimension {{{
enter_dimension_mode()1017         void enter_dimension_mode() {
1018             token_queue.mark_unit();
1019             states.push(ParseState::dimension);
1020         }
1021 
handle_dimension()1022         void handle_dimension() {
1023             if (is_name(ch)) { token_queue.add_char(ch); return; }
1024             if (has_valid_escape()) { enter_escape_mode(); return; }
1025             if (starting_comment()) { enter_comment_mode(); return; }
1026             reconsume();
1027             pop_state();
1028         } // }}}
1029 
1030         // ident {{{
enter_ident_mode(const char32_t starting_ch=0)1031         void enter_ident_mode(const char32_t starting_ch = 0) {
1032             token_queue.add_ident(starting_ch);
1033             states.push(ParseState::ident);
1034         }
1035 
handle_ident()1036         void handle_ident() {
1037             if (is_name(ch)) { token_queue.add_char(ch); return; }
1038             if (has_valid_escape()) { enter_escape_mode(); return; }
1039             if (starting_comment()) { enter_comment_mode(); return; }
1040             pop_state();
1041             if (ch == '(') {
1042                 if (token_queue.current_token_text_equals_case_insensitive("url")) enter_url_start_mode();
1043                 else token_queue.make_function_start();
1044             } else reconsume();
1045         } // }}}
1046 
1047         // url {{{
enter_url_start_mode()1048         void enter_url_start_mode() {
1049             token_queue.make_function_start(true);
1050             states.push(ParseState::url_start);
1051         }
1052 
handle_url_start()1053         void handle_url_start() {
1054             if (is_whitespace(ch)) return;
1055             if (starting_string()) { pop_state(); end_string_with = ch; states.push(ParseState::url_string); return; }
1056             if (ch == ')') { pop_state(); return; }
1057             if (starting_comment()) { enter_comment_mode(); return; }
1058             pop_state(); states.push(ParseState::url);
1059             token_queue.add_char(ch);
1060         }
1061 
handle_url_string()1062         void handle_url_string() {
1063             handle_string();
1064             if (states.top() != ParseState::url_string && states.top() != ParseState::escape) states.push(ParseState::url_after_string);
1065         }
1066 
handle_url_after_string()1067         void handle_url_after_string() {
1068             if (starting_comment()) { enter_comment_mode(); return; }
1069             if (!is_whitespace(ch)) exit_url_mode();
1070         }
1071 
handle_url()1072         void handle_url() {
1073             if (ch == '\\' && has_valid_escape()) enter_escape_mode();
1074             else if (ch == ')') exit_url_mode(true);
1075             else if (starting_comment()) enter_comment_mode();
1076             else token_queue.add_char(ch);
1077         }
1078 
exit_url_mode(bool trim=false)1079         void exit_url_mode(bool trim=false) {
1080             pop_state();
1081             if (trim) token_queue.trim_trailing_whitespace();
1082         }
1083         // }}}
1084 
1085         // at_keyword {{{
enter_at_keyword()1086         void enter_at_keyword() {
1087             states.push(ParseState::at_keyword);
1088             token_queue.add_at_keyword();
1089         }
1090 
handle_at_keyword()1091         void handle_at_keyword() {
1092             handle_name();
1093         } // }}}
1094 
handle_normal()1095         void handle_normal() {
1096             if (starting_comment()) { enter_comment_mode(); return; }
1097             if (is_whitespace(ch)) { token_queue.add_whitespace(ch); return; }
1098             if (is_digit(ch)) { enter_number_mode(); return; }
1099             if (is_name_start(ch)) { enter_ident_mode(ch); return; }
1100             switch (ch) {
1101                 case '"':
1102                 case '\'':
1103                     enter_string_mode();
1104                     break;
1105                 case '#':
1106                     if (is_name(peek()) || has_valid_escape_next()) {
1107                         enter_hash_mode();
1108                     } else token_queue.add_delimiter(ch);
1109                     break;
1110                 case '(':
1111                 case ')':
1112                 case '[':
1113                 case ']':
1114                 case ',':
1115                 case ':':
1116                     token_queue.add_delimiter(ch);
1117                     break;
1118                 case ';':
1119                     token_queue.add_delimiter(ch);
1120 					token_queue.commit_tokens(ch);
1121                     break;
1122 				case '{':
1123                     if (at_rules_allowed() || qualified_rules_allowed()) {
1124                         const bool is_at_rule = token_queue.starts_with_at_keyword();
1125                         push_block_type(true, is_at_rule, is_at_rule);
1126                     }
1127                     token_queue.add_delimiter(ch);
1128 					token_queue.commit_tokens(ch);
1129                     break;
1130 				case '}':
1131                     pop_block_type();
1132                     token_queue.add_delimiter(ch);
1133 					token_queue.commit_tokens(ch);
1134                     break;
1135                 case '+':
1136                     if (is_digit(peek()) || (peek() == '.' && is_digit(peek(1)))) { enter_number_mode(); }
1137                     else token_queue.add_delimiter(ch);
1138                     break;
1139                 case '-':
1140                     if (is_digit(peek()) || (peek() == '.' && is_digit(peek(1)))) { enter_number_mode(); }
1141                     else if (is_top_level() && peek() == '-' && peek(1) == '>') { token_queue.add_cdc(); write_to_output(input.next()); write_to_output(input.next()); }
1142                     else if (has_identifier()) { enter_ident_mode(ch); }
1143                     else token_queue.add_delimiter(ch);
1144                     break;
1145                 case '.':
1146                     if (is_digit(peek())) { enter_number_mode(); }
1147                     else token_queue.add_delimiter(ch);
1148                     break;
1149                 case '<':
1150                     if (is_top_level() && peek() == '!' && peek(1) == '-' && peek(2) == '-') { token_queue.add_cdo(); write_to_output(input.next()); write_to_output(input.next()); }
1151                     else token_queue.add_delimiter(ch);
1152                     break;
1153                 case '@':
1154                     if (at_rules_allowed() && has_identifier_next()) enter_at_keyword();
1155                     else token_queue.add_delimiter(ch);
1156                     break;
1157                 case '\\':
1158                     if (has_valid_escape()) { enter_ident_mode(); enter_escape_mode(); }
1159                     else token_queue.add_delimiter(ch);
1160                     break;
1161                 default:
1162                     token_queue.add_delimiter(ch);
1163                     break;
1164             }
1165         }
1166 
dispatch_current_char()1167         void dispatch_current_char() {
1168             write_to_output(ch);
1169             switch (states.top()) {
1170                 case ParseState::normal:
1171                     handle_normal(); break;
1172                 case ParseState::comment:
1173                     handle_comment(); break;
1174                 case ParseState::escape:
1175                     handle_escape(); break;
1176                 case ParseState::string:
1177                     handle_string(); break;
1178                 case ParseState::hash:
1179                     handle_hash(); break;
1180                 case ParseState::number:
1181                     handle_number(); break;
1182                 case ParseState::digits:
1183                     handle_digits(); break;
1184                 case ParseState::dimension:
1185                     handle_dimension(); break;
1186                 case ParseState::ident:
1187                     handle_ident(); break;
1188                 case ParseState::url_start:
1189                     handle_url_start(); break;
1190                 case ParseState::url_string:
1191                     handle_url_string(); break;
1192                 case ParseState::url:
1193                     handle_url(); break;
1194                 case ParseState::url_after_string:
1195                     handle_url_after_string(); break;
1196                 case ParseState::at_keyword:
1197                     handle_at_keyword(); break;
1198             }
1199             prev_ch = ch;
1200         }
1201 
1202 
1203     public:
Parser(PyObject * src,PyObject * url_callback=NULL,const bool is_declaration=false)1204         Parser(PyObject *src, PyObject *url_callback = NULL, const bool is_declaration = false) :
1205             ch(0), end_string_with('"'), prev_ch(0), block_types(), states(), escape_buf(),
1206             escape_buf_pos(0), token_queue(PyUnicode_GET_LENGTH(src), url_callback), input(src)
1207         {
1208             if (is_declaration) push_block_type(); else push_block_type(true, true, true, true);
1209             states.push(ParseState::normal);
1210         }
1211 
parse(std::u32string & result)1212         void parse(std::u32string &result) {
1213             while (true) {
1214                 ch = input.next();
1215                 if (!ch) break;
1216                 dispatch_current_char();
1217             }
1218             token_queue.commit_tokens(';');
1219 			token_queue.swap_result_to(result);
1220         }
1221 
1222 };
1223 
1224 #define handle_exceptions(msg) \
1225 	catch (std::bad_alloc &ex) { \
1226         (void)ex; \
1227         return PyErr_NoMemory(); \
1228     } catch (python_error &ex) { \
1229         (void)ex; \
1230         return NULL; \
1231     } catch (std::exception &ex) { \
1232         PyErr_SetString(PyExc_Exception, ex.what()); \
1233         return NULL; \
1234     } catch (...) { \
1235         PyErr_SetString(PyExc_Exception, msg); \
1236         return NULL; \
1237     }
1238 
1239 
1240 static PyObject*
transform_properties(PyObject * src,PyObject * url_callback=NULL,bool is_declaration=false)1241 transform_properties(PyObject *src, PyObject *url_callback = NULL, bool is_declaration = false) {
1242     try {
1243         std::u32string result;
1244         Parser parser(src, url_callback, is_declaration);
1245         parser.parse(result);
1246         return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, result.data(), result.size());
1247     } handle_exceptions("Unknown error while parsing CSS");
1248 }
1249 
1250 static PyObject*
transform_properties_python(PyObject * self,PyObject * args,PyObject * kw)1251 transform_properties_python(PyObject *self, PyObject *args, PyObject *kw) {
1252     static const char* kwlist[] = {"src", "url_callback", "is_declaration", NULL};
1253     PyObject *raw, *url_callback = NULL; int is_declaration = 0;
1254     if (!PyArg_ParseTupleAndKeywords(args, kw, "U|Op", (char**)kwlist, &raw, &url_callback, &is_declaration)) return NULL;
1255     if (url_callback == Py_None) url_callback = NULL;
1256     if (url_callback && !PyCallable_Check(url_callback)) { PyErr_SetString(PyExc_TypeError, "url_callback must be a callable"); return NULL; }
1257     if (PyUnicode_READY(raw) != 0) return NULL;
1258     PyObject *result = transform_properties(raw, url_callback, is_declaration);
1259     return result;
1260 }
1261 
1262 static PyObject*
parse_css_number_python(PyObject * self,PyObject * src)1263 parse_css_number_python(PyObject *self, PyObject *src) {
1264 	if (!PyUnicode_Check(src)) { PyErr_SetString(PyExc_TypeError, "Unicode string required"); return NULL; }
1265     if (PyUnicode_READY(src) != 0) { return NULL; }
1266 	try {
1267 		std::u32string text;
1268 		text.reserve(PyUnicode_GET_LENGTH(src));
1269 		int kind = PyUnicode_KIND(src); void *data = PyUnicode_DATA(src);
1270 		for (Py_ssize_t i = 0; i < PyUnicode_GET_LENGTH(src); i++) text.push_back(PyUnicode_READ(kind, data, i));
1271 
1272 		ParsedNumber ans = parse_css_number<std::u32string>(text);
1273 		if (ans.is_integer) return PyLong_FromLongLong(ans.integer_value);
1274 		return PyFloat_FromDouble(ans.float_value);
1275 	} handle_exceptions("Unknown error while parsing CSS number");
1276 }
1277 
1278 #undef handle_exceptions
1279 static PyMethodDef methods[] = {
1280     {"parse_css_number", parse_css_number_python, METH_O,
1281      "Parse a CSS number from a string"
1282     },
1283     {"transform_properties", (PyCFunction)transform_properties_python, METH_VARARGS | METH_KEYWORDS,
1284      "Transform a CSS stylesheet or declaration"
1285     },
1286     {NULL, NULL, 0, NULL}
1287 };
1288 
1289 static int
exec_module(PyObject * m)1290 exec_module(PyObject *m) {
1291     return 0;
1292 }
1293 
1294 static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
1295 
1296 static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT};
1297 
PyInit_fast_css_transform(void)1298 CALIBRE_MODINIT_FUNC PyInit_fast_css_transform(void) {
1299     module_def.m_name     = "fast_css_transform";
1300     module_def.m_doc      = "Fast CSS transformations needed for viewer";
1301     module_def.m_methods  = methods;
1302     module_def.m_slots    = slots;
1303 	return PyModuleDef_Init(&module_def);
1304 }
1305