1 /* dquote.c 2 * 3 * This file contains functions that are related to 4 * parsing double-quotish expressions. 5 * 6 */ 7 8 #include "EXTERN.h" 9 #define PERL_IN_DQUOTE_C 10 #include "perl.h" 11 #include "dquote_inline.h" 12 13 /* XXX Add documentation after final interface and behavior is decided */ 14 /* May want to show context for error, so would pass S_grok_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning) 15 U8 source = *current; 16 */ 17 18 char 19 Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning) 20 { 21 22 U8 result; 23 24 if (! isPRINT_A(source)) { 25 Perl_croak(aTHX_ "%s", 26 "Character following \"\\c\" must be printable ASCII"); 27 } 28 else if (source == '{') { 29 const char control = toCTRL('{'); 30 if (isPRINT_A(control)) { 31 /* diag_listed_as: Use "%s" instead of "%s" */ 32 Perl_croak(aTHX_ "Use \"%c\" instead of \"\\c{\"", control); 33 } 34 else { 35 Perl_croak(aTHX_ "Sequence \"\\c{\" invalid"); 36 } 37 } 38 39 result = toCTRL(source); 40 if (output_warning && isPRINT_A(result)) { 41 U8 clearer[3]; 42 U8 i = 0; 43 if (! isWORDCHAR(result)) { 44 clearer[i++] = '\\'; 45 } 46 clearer[i++] = result; 47 clearer[i++] = '\0'; 48 49 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), 50 "\"\\c%c\" is more clearly written simply as \"%s\"", 51 source, 52 clearer); 53 } 54 55 return result; 56 } 57 58 bool 59 Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv, 60 const char** error_msg, 61 const bool output_warning, const bool strict, 62 const bool silence_non_portable, 63 const bool UTF) 64 { 65 66 /* Documentation to be supplied when interface nailed down finally 67 * This returns FALSE if there is an error which the caller need not recover 68 * from; otherwise TRUE. In either case the caller should look at *len [???]. 69 * It guarantees that the returned codepoint, *uv, when expressed as 70 * utf8 bytes, would fit within the skipped "\o{...}" bytes. 71 * On input: 72 * s is the address of a pointer to a string. **s is 'o', and the 73 * previous character was a backslash. At exit, *s will be advanced 74 * to the byte just after those absorbed by this function. Hence the 75 * caller can continue parsing from there. In the case of an error, 76 * this routine has generally positioned *s to point just to the right 77 * of the first bad spot, so that a message that has a "<--" to mark 78 * the spot will be correctly positioned. 79 * send - 1 gives a limit in *s that this function is not permitted to 80 * look beyond. That is, the function may look at bytes only in the 81 * range *s..send-1 82 * uv points to a UV that will hold the output value, valid only if the 83 * return from the function is TRUE 84 * error_msg is a pointer that will be set to an internal buffer giving an 85 * error message upon failure (the return is FALSE). Untouched if 86 * function succeeds 87 * output_warning says whether to output any warning messages, or suppress 88 * them 89 * strict is true if this should fail instead of warn if there are 90 * non-octal digits within the braces 91 * silence_non_portable is true if to suppress warnings about the code 92 * point returned being too large to fit on all platforms. 93 * UTF is true iff the string *s is encoded in UTF-8. 94 */ 95 char* e; 96 STRLEN numbers_len; 97 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES 98 | PERL_SCAN_DISALLOW_PREFIX 99 /* XXX Until the message is improved in grok_oct, handle errors 100 * ourselves */ 101 | PERL_SCAN_SILENT_ILLDIGIT; 102 103 PERL_ARGS_ASSERT_GROK_BSLASH_O; 104 105 assert(*(*s - 1) == '\\'); 106 assert(* *s == 'o'); 107 (*s)++; 108 109 if (send <= *s || **s != '{') { 110 *error_msg = "Missing braces on \\o{}"; 111 return FALSE; 112 } 113 114 e = (char *) memchr(*s, '}', send - *s); 115 if (!e) { 116 (*s)++; /* Move past the '{' */ 117 while (isOCTAL(**s)) { /* Position beyond the legal digits */ 118 (*s)++; 119 } 120 *error_msg = "Missing right brace on \\o{"; 121 return FALSE; 122 } 123 124 (*s)++; /* Point to expected first digit (could be first byte of utf8 125 sequence if not a digit) */ 126 numbers_len = e - *s; 127 if (numbers_len == 0) { 128 (*s)++; /* Move past the } */ 129 *error_msg = "Empty \\o{}"; 130 return FALSE; 131 } 132 133 if (silence_non_portable) { 134 flags |= PERL_SCAN_SILENT_NON_PORTABLE; 135 } 136 137 *uv = grok_oct(*s, &numbers_len, &flags, NULL); 138 /* Note that if has non-octal, will ignore everything starting with that up 139 * to the '}' */ 140 141 if (numbers_len != (STRLEN) (e - *s)) { 142 if (strict) { 143 *s += numbers_len; 144 *s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1; 145 *error_msg = "Non-octal character"; 146 return FALSE; 147 } 148 else if (output_warning) { 149 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 150 /* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */ 151 "Non-octal character '%c'. Resolved as \"\\o{%.*s}\"", 152 *(*s + numbers_len), 153 (int) numbers_len, 154 *s); 155 } 156 } 157 158 /* Return past the '}' */ 159 *s = e + 1; 160 161 return TRUE; 162 } 163 164 bool 165 Perl_grok_bslash_x(pTHX_ char **s, const char * const send, UV *uv, 166 const char** error_msg, 167 const bool output_warning, const bool strict, 168 const bool silence_non_portable, 169 const bool UTF) 170 { 171 172 /* Documentation to be supplied when interface nailed down finally 173 * This returns FALSE if there is an error which the caller need not recover 174 * from; otherwise TRUE. 175 * It guarantees that the returned codepoint, *uv, when expressed as 176 * utf8 bytes, would fit within the skipped "\x{...}" bytes. 177 * 178 * On input: 179 * s is the address of a pointer to a string. **s is 'x', and the 180 * previous character was a backslash. At exit, *s will be advanced 181 * to the byte just after those absorbed by this function. Hence the 182 * caller can continue parsing from there. In the case of an error, 183 * this routine has generally positioned *s to point just to the right 184 * of the first bad spot, so that a message that has a "<--" to mark 185 * the spot will be correctly positioned. 186 * send - 1 gives a limit in *s that this function is not permitted to 187 * look beyond. That is, the function may look at bytes only in the 188 * range *s..send-1 189 * uv points to a UV that will hold the output value, valid only if the 190 * return from the function is TRUE 191 * error_msg is a pointer that will be set to an internal buffer giving an 192 * error message upon failure (the return is FALSE). Untouched if 193 * function succeeds 194 * output_warning says whether to output any warning messages, or suppress 195 * them 196 * strict is true if anything out of the ordinary should cause this to 197 * fail instead of warn or be silent. For example, it requires 198 * exactly 2 digits following the \x (when there are no braces). 199 * 3 digits could be a mistake, so is forbidden in this mode. 200 * silence_non_portable is true if to suppress warnings about the code 201 * point returned being too large to fit on all platforms. 202 * UTF is true iff the string *s is encoded in UTF-8. 203 */ 204 char* e; 205 STRLEN numbers_len; 206 I32 flags = PERL_SCAN_DISALLOW_PREFIX; 207 208 209 PERL_ARGS_ASSERT_GROK_BSLASH_X; 210 211 assert(*(*s - 1) == '\\'); 212 assert(* *s == 'x'); 213 214 (*s)++; 215 216 if (send <= *s) { 217 if (strict) { 218 *error_msg = "Empty \\x"; 219 return FALSE; 220 } 221 222 /* Sadly, to preserve backcompat, an empty \x at the end of string is 223 * interpreted as a NUL */ 224 *uv = 0; 225 return TRUE; 226 } 227 228 if (strict || ! output_warning) { 229 flags |= PERL_SCAN_SILENT_ILLDIGIT; 230 } 231 232 if (**s != '{') { 233 STRLEN len = (strict) ? 3 : 2; 234 235 *uv = grok_hex(*s, &len, &flags, NULL); 236 *s += len; 237 if (strict && len != 2) { 238 if (len < 2) { 239 *s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1; 240 *error_msg = "Non-hex character"; 241 } 242 else { 243 *error_msg = "Use \\x{...} for more than two hex characters"; 244 } 245 return FALSE; 246 } 247 return TRUE; 248 } 249 250 e = (char *) memchr(*s, '}', send - *s); 251 if (!e) { 252 (*s)++; /* Move past the '{' */ 253 while (isXDIGIT(**s)) { /* Position beyond the legal digits */ 254 (*s)++; 255 } 256 /* XXX The corresponding message above for \o is just '\\o{'; other 257 * messages for other constructs include the '}', so are inconsistent. 258 */ 259 *error_msg = "Missing right brace on \\x{}"; 260 return FALSE; 261 } 262 263 (*s)++; /* Point to expected first digit (could be first byte of utf8 264 sequence if not a digit) */ 265 numbers_len = e - *s; 266 if (numbers_len == 0) { 267 if (strict) { 268 (*s)++; /* Move past the } */ 269 *error_msg = "Empty \\x{}"; 270 return FALSE; 271 } 272 *s = e + 1; 273 *uv = 0; 274 return TRUE; 275 } 276 277 flags |= PERL_SCAN_ALLOW_UNDERSCORES; 278 if (silence_non_portable) { 279 flags |= PERL_SCAN_SILENT_NON_PORTABLE; 280 } 281 282 *uv = grok_hex(*s, &numbers_len, &flags, NULL); 283 /* Note that if has non-hex, will ignore everything starting with that up 284 * to the '}' */ 285 286 if (strict && numbers_len != (STRLEN) (e - *s)) { 287 *s += numbers_len; 288 *s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1; 289 *error_msg = "Non-hex character"; 290 return FALSE; 291 } 292 293 /* Return past the '}' */ 294 *s = e + 1; 295 296 return TRUE; 297 } 298 299 /* 300 * ex: set ts=8 sts=4 sw=4 et: 301 */ 302