1--echo # Start of ctype_unescape.inc 2 3SET sql_mode = ''; 4 5# 6# Testing how string literals with backslash and quote-quote are unescaped. 7# The tests assume that single quote (') is used as a delimiter. 8# 9 10# 11# Make sure that the parser really works using the character set we need. 12# We use binary strings to compose strings, to be able to test get malformed 13# sequences, which are possible as a result of mysql_real_escape_string(). 14# The important thing for this test is to make the parser unescape using 15# the client character set, rather than binary. Currently it works exactly 16# that way by default, so the query below should return @@character_set_client 17# 18SET @query=_binary'SELECT CHARSET(\'test\'),@@character_set_client,@@character_set_connection'; 19PREPARE stmt FROM @query; 20EXECUTE stmt; 21DEALLOCATE PREPARE stmt; 22 23let $CHARSET=`SELECT @@character_set_connection`; 24 25CREATE TABLE allbytes (a VARBINARY(10)); 26 27# 28# Create various byte sequences to test. Testing the full banch of 29# possible combinations takes about 2 minutes. So this test provides 30# variants to run with: 31# - the full set of possible combinations 32# - a reduced test of combinations for selected bytes only 33# 34 35# Create selected byte combinations 36if ($ctype_unescape_combinations == 'selected') 37{ 38--echo # Using selected bytes combinations 39--source include/bytes.inc 40# 41# Populate "selected_bytes" with bytes that have a special meaning. 42# We'll use "selected_bytes" to generate byte seqeunces, 43# instead of the full possible byte combinations, to reduce test time. 44# 45CREATE TABLE selected_bytes (a VARBINARY(10)); 46 47# Bytes that have a special meaning in all character sets: 48# 0x00 - mysql_real_escape_string() quotes this to '\0' 49# 0x0D - mysql_real_escape_string() quotes this to '\r' 50# 0x0A - mysql_real_escape_string() quotes this to '\n' 51# 0x1A - mysql_real_escape_string() quotes this to '\Z' 52# 0x08 - mysql_real_escape_string() does not quote this, 53# but '\b' is unescaped to 0x08. 54# 0x09 - mysql_real_escape_string() does not quote this, 55# but '\t' is unescaped to 0x09. 56# 0x30 - '0', as in '\0' 57# 0x5A - 'Z', as in '\Z' 58# 0x62 - 'b', as in '\b' 59# 0x6E - 'n', as in '\n' 60# 0x72 - 't', as in '\r' 61# 0x74 - 't', as in '\t' 62 63INSERT INTO selected_bytes (a) VALUES ('\0'),('\b'),('\t'),('\r'),('\n'),('\Z'); 64INSERT INTO selected_bytes (a) VALUES ('0'),('b'),('t'),('r'),('n'),('Z'); 65 66# 0x22 - double quote 67# 0x25 - percent sign, '\%' is preserved as is for LIKE. 68# 0x27 - single quote 69# 0x5C - backslash 70# 0x5F - underscore, '\_' is preserved as is for LIKE. 71INSERT INTO selected_bytes (a) VALUES ('\\'),('_'),('%'),(0x22),(0x27); 72 73# Some bytes do not have any special meaning, for example basic Latin letters. 74# Let's add, one should be enough for a good enough coverage. 75INSERT INTO selected_bytes (a) VALUES ('a'); 76 77# 78# This maps summarizes bytes that have a special 79# meaning in various character sets: 80# 81# MBHEAD MBTAIL NONASCII-8BIT BAD 82# ------ ------ -------------- ---------- 83# big5: [A1..F9] [40..7E,A1..FE] N/A [80..A0,FA..FF] 84# cp932: [81..9F,E0..FC] [40..7E,80..FC] [A1..DF] [FD..FF] 85# gbk: [81..FE] [40..7E,80..FE] N/A [FF] 86# sjis: [81..9F,E0..FC] [40..7E,80..FC] [A1..DF] [FD..FF] 87# swe7: N/A N/A [5B..5E,7B..7E] [80..FF] 88# 89 90INSERT INTO selected_bytes (a) VALUES 91(0x3F), # 7bit 92(0x40), # 7bit mbtail 93(0x7E), # 7bit mbtail nonascii-8bit 94(0x7F), # 7bit nonascii-8bit 95(0x80), # mbtail bad-mb 96(0x81), # mbhead mbtail 97(0x9F), # mbhead mbtail bad-mb 98(0xA0), # mbhead mbtail bad-mb 99(0xA1), # mbhead mbtail nonascii-8bit 100(0xE0), # mbhead mbtai 101(0xEF), # mbhead mbtail 102(0xF9), # mbhead mbtail 103(0xFA), # mbhead mbtail bad-mb 104(0xFC), # mbhead mbtail bad-mb 105(0xFD), # mbhead mbtail bad-mb 106(0xFE), # mbhead mbtial bad-mb 107(0xFF); # bad-mb 108 109# 110# Now populate the test table 111# 112 113# Use all single bytes, this is cheap, there are only 256 values. 114INSERT INTO allbytes (a) SELECT a FROM bytes; 115 116# Add selected bytes combinations 117INSERT INTO allbytes (a) SELECT CONCAT(t1.a,t2.a) FROM selected_bytes t1,selected_bytes t2; 118INSERT INTO allbytes (a) SELECT CONCAT(0x5C,t1.a,t2.a) FROM selected_bytes t1,selected_bytes t2; 119INSERT INTO allbytes (a) SELECT CONCAT(0x5C,t1.a,0x5C,t2.a) FROM selected_bytes t1,selected_bytes t2; 120DROP TABLE selected_bytes; 121 122# Delete all non-single byte sequences that do not have 123# backslashes or quotes at all. There is nothing special with these strings. 124DELETE FROM allbytes WHERE 125 OCTET_LENGTH(a)>1 AND 126 LOCATE(0x5C,a)=0 AND 127 a NOT LIKE '%\'%' AND 128 a NOT LIKE '%"%'; 129 130} 131 132if ($ctype_unescape_combinations=='') 133{ 134--echo # Using full byte combinations 135--source include/bytes2.inc 136INSERT INTO allbytes (a) SELECT a FROM bytes; 137INSERT INTO allbytes (a) SELECT CONCAT(hi,lo) FROM bytes2; 138INSERT INTO allbytes (a) SELECT CONCAT(0x5C,hi,lo) FROM bytes2; 139INSERT INTO allbytes (a) SELECT CONCAT(0x5C,hi,0x5C,lo) FROM bytes2; 140} 141 142 143DELIMITER //; 144 145# 146# A procedure that make an SQL query using 'val' as a string literal. 147# The result of the query execution is written into the table 't1'. 148# NULL in t1.b means that query failed due to syntax error, 149# typically because of mis-interpreted closing quote delimiter. 150# 151CREATE PROCEDURE p1(val VARBINARY(10)) 152BEGIN 153 DECLARE EXIT HANDLER FOR SQLSTATE '42000' INSERT INTO t1 (a,b) VALUES(val,NULL); 154 SET @query=CONCAT(_binary"INSERT INTO t1 (a,b) VALUES (0x",HEX(val),",'",val,"')"); 155 PREPARE stmt FROM @query; 156 EXECUTE stmt; 157 DEALLOCATE PREPARE stmt; 158END// 159 160# 161# A procedure that iterates through all records in "allbytes". 162# And runs p1() for every record. 163# 164CREATE PROCEDURE p2() 165BEGIN 166 DECLARE val VARBINARY(10); 167 DECLARE done INT DEFAULT FALSE; 168 DECLARE stmt CURSOR FOR SELECT a FROM allbytes; 169 DECLARE CONTINUE HANDLER FOR NOT FOUND SET done=TRUE; 170 OPEN stmt; 171read_loop1: LOOP 172 FETCH stmt INTO val; 173 IF done THEN 174 LEAVE read_loop1; 175 END IF; 176 CALL p1(val); 177END LOOP; 178 CLOSE stmt; 179END// 180 181 182# A function that converts the value from binary to $CHARSET 183# and check if it has changed. CONVERT() fixes malformed strings. 184# So if the string changes in CONVERT(), it means it was not wellformed. 185--eval CREATE FUNCTION iswellformed(a VARBINARY(256)) RETURNS INT RETURN a=BINARY CONVERT(a USING $CHARSET); 186 187# 188# A function that approximately reproduces how the SQL parser 189# would unescape a binary string. 190# 191CREATE FUNCTION unescape(a VARBINARY(256)) RETURNS VARBINARY(256) 192BEGIN 193 # We need to do it in a way to avoid producing new escape sequences 194 # First, enclose all known escsape sequences to '{{xx}}' 195 # - Backslash not followed by a LIKE pattern characters _ and % 196 # - Double escapes 197 # This uses PCRE Branch Reset Groups: (?|(alt1)|(alt2)|(alt3)). 198 # So '\\1' in the last argument always means the match, no matter 199 # which alternative it came from. 200 SET a=REGEXP_REPLACE(a,'(?|(\\\\[^_%])|(\\x{27}\\x{27}))','{{\\1}}'); 201 # Now unescape all enclosed standard escape sequences 202 SET a=REPLACE(a,'{{\\0}}', '\0'); 203 SET a=REPLACE(a,'{{\\b}}', '\b'); 204 SET a=REPLACE(a,'{{\\t}}', '\t'); 205 SET a=REPLACE(a,'{{\\r}}', '\r'); 206 SET a=REPLACE(a,'{{\\n}}', '\n'); 207 SET a=REPLACE(a,'{{\\Z}}', '\Z'); 208 SET a=REPLACE(a,'{{\\\'}}', '\''); 209 # Unescape double quotes 210 SET a=REPLACE(a,'{{\'\'}}', '\''); 211 # Unescape the rest: all other \x sequences mean just 'x' 212 SET a=REGEXP_REPLACE(a, '{{\\\\(.|\\R)}}', '\\1'); 213 RETURN a; 214END// 215 216 217# 218# A function that checks what happened during unescaping. 219# 220# @param a - the value before unescaping 221# @param b - the value after unescaping 222# 223# The following return values are possible: 224# - SyntErr - b IS NULL, which means syntax error happened in p1(). 225# - Preserv - the value was not modified during unescaping. 226# This is possible if 0x5C was treated as mbtail. 227# Or only LIKE escape sequences were found: '\_' and '\%'. 228# - Trivial - only 0x5C were removed. 229# - Regular - the value was unescaped like a binary string. 230# Some standard escape sequences were found. 231# No special multi-byte handling happened. 232# - Special - Something else happened. Should not happen. 233# 234CREATE FUNCTION unescape_type(a VARBINARY(256),b VARBINARY(256)) RETURNS VARBINARY(256) 235BEGIN 236 RETURN CASE 237 WHEN b IS NULL THEN '[SyntErr]' 238 WHEN a=b THEN CASE 239 WHEN OCTET_LENGTH(a)=1 THEN '[Preserve]' 240 WHEN a RLIKE '\\\\[_%]' THEN '[Preserve][LIKE]' 241 WHEN a RLIKE '^[[:ascii:]]+$' THEN '[Preserve][ASCII]' 242 ELSE '[Preserv][MB]' END 243 WHEN REPLACE(a,0x5C,'')=b THEN '[Trivial]' 244 WHEN UNESCAPE(a)=b THEN '[Regular]' 245 ELSE '[Special]' END; 246END// 247 248 249# 250# Check what happened with wellformedness during unescaping 251# @param a - the value before unescaping 252# @param b - the value after unescaping 253# 254# Returned values: 255# [FIXED] - the value was malformed and become wellformed after unescaping 256# [BROKE] - the value was wellformed and become malformed after unescaping 257# [ILSEQ] - both values (before unescaping and after unescaping) are malformed 258# '' - both values are wellformed 259# 260CREATE FUNCTION wellformedness(a VARBINARY(256), b VARBINARY(256)) 261 RETURNS VARBINARY(256) 262BEGIN 263 RETURN CASE 264 WHEN b IS NULL THEN '' 265 WHEN NOT iswellformed(a) AND iswellformed(b) THEN '[FIXED]' 266 WHEN iswellformed(a) AND NOT iswellformed(b) THEN '[BROKE]' 267 WHEN NOT iswellformed(a) AND NOT iswellformed(b) THEN '[ILSEQ]' 268 ELSE '' 269 END; 270END// 271 272 273# 274# Check if the value could be generated by mysql_real_escape_string(), 275# or can only come from a direct user input. 276# 277# @param a - the value before unescaping 278# 279# Returns: 280# [USER] - if the value could not be generated by mysql_real_escape_string() 281# '' - if the value was possibly generated by mysql_real_escape_string() 282# 283# 284CREATE FUNCTION mysql_real_escape_string_generated(a VARBINARY(256)) 285 RETURNS VARBINARY(256) 286BEGIN 287 DECLARE a1 BINARY(1) DEFAULT SUBSTR(a,1,1); 288 DECLARE a2 BINARY(1) DEFAULT SUBSTR(a,2,1); 289 DECLARE a3 BINARY(1) DEFAULT SUBSTR(a,3,1); 290 DECLARE a4 BINARY(1) DEFAULT SUBSTR(a,4,1); 291 DECLARE a2a4 BINARY(2) DEFAULT CONCAT(a2,a4); 292 RETURN CASE 293 WHEN (a1=0x5C) AND 294 (a3=0x5C) AND 295 (a2>0x7F) AND 296 (a4 NOT IN ('_','%','0','t','r','n','Z')) AND 297 iswellformed(a2a4) THEN '[USER]' 298 ELSE '' 299 END; 300END// 301 302DELIMITER ;// 303 304 305CREATE TABLE t1 (a VARBINARY(10),b VARBINARY(10)); 306CALL p2(); 307# Avoid "Invalid XXX character string" warnings 308# We mark malformed strings in the output anyway 309--disable_warnings 310# All records marked with '[BAD]' mean that the string was unescaped 311# in a unexpected way, that means there is a bug in UNESCAPE() above. 312SELECT HEX(a),HEX(b), 313 CONCAT(unescape_type(a,b), 314 wellformedness(a,b), 315 mysql_real_escape_string_generated(a), 316 IF(UNESCAPE(a)<>b,CONCAT('[BAD',HEX(UNESCAPE(a)),']'),'')) AS comment 317FROM t1 ORDER BY LENGTH(a),a; 318--enable_warnings 319DROP TABLE t1; 320DROP PROCEDURE p1; 321DROP PROCEDURE p2; 322DROP FUNCTION unescape; 323DROP FUNCTION unescape_type; 324DROP FUNCTION wellformedness; 325DROP FUNCTION mysql_real_escape_string_generated; 326DROP FUNCTION iswellformed; 327DROP TABLE allbytes; 328SET sql_mode = DEFAULT; 329 330--echo # End of ctype_backslash.inc 331