1"""This module translates JS flow into PY flow. 2 3Translates: 4IF ELSE 5 6DO WHILE 7WHILE 8FOR 123 9FOR iter 10CONTINUE, BREAK, RETURN, LABEL, THROW, TRY, SWITCH 11""" 12from __future__ import print_function 13 14from utils import * 15from jsparser import * 16from nodevisitor import exp_translator 17import random 18 19TO_REGISTER = [] 20CONTINUE_LABEL = 'JS_CONTINUE_LABEL_%s' 21BREAK_LABEL = 'JS_BREAK_LABEL_%s' 22 23PREPARE = '''HOLDER = var.own.get(NAME)\nvar.force_own_put(NAME, PyExceptionToJs(PyJsTempException))\n''' 24RESTORE = '''if HOLDER is not None:\n var.own[NAME] = HOLDER\nelse:\n del var.own[NAME]\ndel HOLDER\n''' 25TRY_CATCH = '''%stry:\nBLOCKfinally:\n%s''' % (PREPARE, indent(RESTORE)) 26 27 28def get_continue_label(label): 29 return CONTINUE_LABEL % label.encode('hex') 30 31 32def get_break_label(label): 33 return BREAK_LABEL % label.encode('hex') 34 35 36def pass_until(source, start, tokens=(';', )): 37 while start < len(source) and source[start] not in tokens: 38 start += 1 39 return start + 1 40 41 42def do_bracket_exp(source, start, throw=True): 43 bra, cand = pass_bracket(source, start, '()') 44 if throw and not bra: 45 raise SyntaxError('Missing bracket expression') 46 bra = exp_translator(bra[1:-1]) 47 if throw and not bra: 48 raise SyntaxError('Empty bracket condition') 49 return bra, cand if bra else start 50 51 52def do_if(source, start): 53 start += 2 # pass this if 54 bra, start = do_bracket_exp(source, start, throw=True) 55 statement, start = do_statement(source, start) 56 if statement is None: 57 raise SyntaxError('Invalid if statement') 58 translated = 'if %s:\n' % bra + indent(statement) 59 60 elseif = except_keyword(source, start, 'else') 61 is_elseif = False 62 if elseif: 63 start = elseif 64 if except_keyword(source, start, 'if'): 65 is_elseif = True 66 elseif, start = do_statement(source, start) 67 if elseif is None: 68 raise SyntaxError('Invalid if statement)') 69 if is_elseif: 70 translated += 'el' + elseif 71 else: 72 translated += 'else:\n' + indent(elseif) 73 return translated, start 74 75 76def do_statement(source, start): 77 """returns none if not found other functions that begin with 'do_' raise 78 also this do_ type function passes white space""" 79 start = pass_white(source, start) 80 # start is the fist position after initial start that is not a white space or \n 81 if not start < len(source): #if finished parsing return None 82 return None, start 83 if any(startswith_keyword(source[start:], e) for e in {'case', 'default'}): 84 return None, start 85 rest = source[start:] 86 for key, meth in KEYWORD_METHODS.iteritems( 87 ): # check for statements that are uniquely defined by their keywords 88 if rest.startswith(key): 89 # has to startwith this keyword and the next letter after keyword must be either EOF or not in IDENTIFIER_PART 90 if len(key) == len(rest) or rest[len(key)] not in IDENTIFIER_PART: 91 return meth(source, start) 92 if rest[0] == '{': #Block 93 return do_block(source, start) 94 # Now only label and expression left 95 cand = parse_identifier(source, start, False) 96 if cand is not None: # it can mean that its a label 97 label, cand_start = cand 98 cand_start = pass_white(source, cand_start) 99 if source[cand_start] == ':': 100 return do_label(source, start) 101 return do_expression(source, start) 102 103 104def do_while(source, start): 105 start += 5 # pass while 106 bra, start = do_bracket_exp(source, start, throw=True) 107 statement, start = do_statement(source, start) 108 if statement is None: 109 raise SyntaxError('Missing statement to execute in while loop!') 110 return 'while %s:\n' % bra + indent(statement), start 111 112 113def do_dowhile(source, start): 114 start += 2 # pass do 115 statement, start = do_statement(source, start) 116 if statement is None: 117 raise SyntaxError('Missing statement to execute in do while loop!') 118 start = except_keyword(source, start, 'while') 119 if not start: 120 raise SyntaxError('Missing while keyword in do-while loop') 121 bra, start = do_bracket_exp(source, start, throw=True) 122 statement += 'if not %s:\n' % bra + indent('break\n') 123 return 'while 1:\n' + indent(statement), start 124 125 126def do_block(source, start): 127 bra, start = pass_bracket(source, start, '{}') 128 #print source[start:], bra 129 #return bra +'\n', start 130 if bra is None: 131 raise SyntaxError('Missing block ( {code} )') 132 code = '' 133 bra = bra[1:-1] + ';' 134 bra_pos = 0 135 while bra_pos < len(bra): 136 st, bra_pos = do_statement(bra, bra_pos) 137 if st is None: 138 break 139 code += st 140 bra_pos = pass_white(bra, bra_pos) 141 if bra_pos < len(bra): 142 raise SyntaxError('Block has more code that could not be parsed:\n' + 143 bra[bra_pos:]) 144 return code, start 145 146 147def do_empty(source, start): 148 return 'pass\n', start + 1 149 150 151def do_expression(source, start): 152 start = pass_white(source, start) 153 end = pass_until(source, start, tokens=(';', )) 154 if end == start + 1: #empty statement 155 return 'pass\n', end 156 # AUTOMATIC SEMICOLON INSERTION FOLLOWS 157 # Without ASI this function would end with: return exp_translator(source[start:end].rstrip(';'))+'\n', end 158 # ASI makes things a bit more complicated: 159 # we will try to parse as much as possible, inserting ; in place of last new line in case of error 160 rev = False 161 rpos = 0 162 while True: 163 try: 164 code = source[start:end].rstrip(';') 165 cand = exp_translator(code) + '\n', end 166 just_to_test = compile(cand[0], '', 'exec') 167 return cand 168 except Exception as e: 169 if not rev: 170 rev = source[start:end][::-1] 171 lpos = rpos 172 while True: 173 rpos = pass_until(rev, rpos, LINE_TERMINATOR) 174 if rpos >= len(rev): 175 raise 176 if filter(lambda x: x not in SPACE, rev[lpos:rpos]): 177 break 178 end = start + len(rev) - rpos + 1 179 180 181def do_var(source, start): 182 #todo auto ; insertion 183 start += 3 #pass var 184 end = pass_until(source, start, tokens=(';', )) 185 defs = argsplit( 186 source[start:end - 1] 187 ) # defs is the list of defined vars with optional initializer 188 code = '' 189 for de in defs: 190 var, var_end = parse_identifier(de, 0, True) 191 TO_REGISTER.append(var) 192 var_end = pass_white(de, var_end) 193 if var_end < len( 194 de 195 ): # we have something more to parse... It has to start with = 196 if de[var_end] != '=': 197 raise SyntaxError( 198 'Unexpected initializer in var statement. Expected "=", got "%s"' 199 % de[var_end]) 200 code += exp_translator(de) + '\n' 201 if not code.strip(): 202 code = 'pass\n' 203 return code, end 204 205 206def do_label(source, start): 207 label, end = parse_identifier(source, start) 208 end = pass_white(source, end) 209 #now source[end] must be : 210 assert source[end] == ':' 211 end += 1 212 inside, end = do_statement(source, end) 213 if inside is None: 214 raise SyntaxError('Missing statement after label') 215 defs = '' 216 if inside.startswith('while ') or inside.startswith( 217 'for ') or inside.startswith('#for'): 218 # we have to add contine label as well... 219 # 3 or 1 since #for loop type has more lines before real for. 220 sep = 1 if not inside.startswith('#for') else 3 221 cont_label = get_continue_label(label) 222 temp = inside.split('\n') 223 injected = 'try:\n' + '\n'.join(temp[sep:]) 224 injected += 'except %s:\n pass\n' % cont_label 225 inside = '\n'.join(temp[:sep]) + '\n' + indent(injected) 226 defs += 'class %s(Exception): pass\n' % cont_label 227 break_label = get_break_label(label) 228 inside = 'try:\n%sexcept %s:\n pass\n' % (indent(inside), break_label) 229 defs += 'class %s(Exception): pass\n' % break_label 230 return defs + inside, end 231 232 233def do_for(source, start): 234 start += 3 # pass for 235 entered = start 236 bra, start = pass_bracket(source, start, '()') 237 inside, start = do_statement(source, start) 238 if inside is None: 239 raise SyntaxError('Missing statement after for') 240 bra = bra[1:-1] 241 if ';' in bra: 242 init = argsplit(bra, ';') 243 if len(init) != 3: 244 raise SyntaxError('Invalid for statement') 245 args = [] 246 for i, item in enumerate(init): 247 end = pass_white(item, 0) 248 if end == len(item): 249 args.append('' if i != 1 else '1') 250 continue 251 if not i and except_keyword(item, end, 'var') is not None: 252 # var statement 253 args.append(do_var(item, end)[0]) 254 continue 255 args.append(do_expression(item, end)[0]) 256 return '#for JS loop\n%swhile %s:\n%s%s\n' % ( 257 args[0], args[1].strip(), indent(inside), indent(args[2])), start 258 # iteration 259 end = pass_white(bra, 0) 260 register = False 261 if bra[end:].startswith('var '): 262 end += 3 263 end = pass_white(bra, end) 264 register = True 265 name, end = parse_identifier(bra, end) 266 if register: 267 TO_REGISTER.append(name) 268 end = pass_white(bra, end) 269 if bra[end:end + 2] != 'in' or bra[end + 2] in IDENTIFIER_PART: 270 #print source[entered-10:entered+50] 271 raise SyntaxError('Invalid "for x in y" statement') 272 end += 2 # pass in 273 exp = exp_translator(bra[end:]) 274 res = 'for temp in %s:\n' % exp 275 res += indent('var.put(%s, temp)\n' % name.__repr__()) + indent(inside) 276 return res, start 277 278 279# todo - IMPORTANT 280def do_continue(source, start, name='continue'): 281 start += len(name) #pass continue 282 start = pass_white(source, start) 283 if start < len(source) and source[start] == ';': 284 return '%s\n' % name, start + 1 285 # labeled statement or error 286 label, start = parse_identifier(source, start) 287 start = pass_white(source, start) 288 if start < len(source) and source[start] != ';': 289 raise SyntaxError('Missing ; after label name in %s statement' % name) 290 return 'raise %s("%s")\n' % (get_continue_label(label) 291 if name == 'continue' else 292 get_break_label(label), name), start + 1 293 294 295def do_break(source, start): 296 return do_continue(source, start, 'break') 297 298 299def do_return(source, start): 300 start += 6 # pass return 301 end = source.find(';', start) + 1 302 if end == -1: 303 end = len(source) 304 trans = exp_translator(source[start:end].rstrip(';')) 305 return 'return %s\n' % (trans if trans else "var.get('undefined')"), end 306 307 308# todo later?- Also important 309def do_throw(source, start): 310 start += 5 # pass throw 311 end = source.find(';', start) + 1 312 if not end: 313 end = len(source) 314 trans = exp_translator(source[start:end].rstrip(';')) 315 if not trans: 316 raise SyntaxError('Invalid throw statement: nothing to throw') 317 res = 'PyJsTempException = JsToPyException(%s)\nraise PyJsTempException\n' % trans 318 return res, end 319 320 321def do_try(source, start): 322 start += 3 # pass try 323 block, start = do_block(source, start) 324 result = 'try:\n%s' % indent(block) 325 catch = except_keyword(source, start, 'catch') 326 if catch: 327 bra, catch = pass_bracket(source, catch, '()') 328 bra = bra[1:-1] 329 identifier, bra_end = parse_identifier(bra, 0) 330 holder = 'PyJsHolder_%s_%d' % (identifier.encode('hex'), 331 random.randrange(1e8)) 332 identifier = identifier.__repr__() 333 bra_end = pass_white(bra, bra_end) 334 if bra_end < len(bra): 335 raise SyntaxError('Invalid content of catch statement') 336 result += 'except PyJsException as PyJsTempException:\n' 337 block, catch = do_block(source, catch) 338 # fill in except ( catch ) block and remember to recover holder variable to its previous state 339 result += indent( 340 TRY_CATCH.replace('HOLDER', holder).replace('NAME', 341 identifier).replace( 342 'BLOCK', 343 indent(block))) 344 start = max(catch, start) 345 final = except_keyword(source, start, 'finally') 346 if not (final or catch): 347 raise SyntaxError( 348 'Try statement has to be followed by catch or finally') 349 if not final: 350 return result, start 351 # translate finally statement 352 block, start = do_block(source, final) 353 return result + 'finally:\n%s' % indent(block), start 354 355 356def do_debugger(source, start): 357 start += 8 # pass debugger 358 end = pass_white(source, start) 359 if end < len(source) and source[end] == ';': 360 end += 1 361 return 'pass\n', end #ignore errors... 362 363 364# todo automatic ; insertion. fuck this crappy feature 365 366# Least important 367 368 369def do_switch(source, start): 370 start += 6 # pass switch 371 code = 'while 1:\n' + indent('SWITCHED = False\nCONDITION = (%s)\n') 372 # parse value of check 373 val, start = pass_bracket(source, start, '()') 374 if val is None: 375 raise SyntaxError('Missing () after switch statement') 376 if not val.strip(): 377 raise SyntaxError('Missing content inside () after switch statement') 378 code = code % exp_translator(val) 379 bra, start = pass_bracket(source, start, '{}') 380 if bra is None: 381 raise SyntaxError('Missing block {} after switch statement') 382 bra_pos = 0 383 bra = bra[1:-1] + ';' 384 while True: 385 case = except_keyword(bra, bra_pos, 'case') 386 default = except_keyword(bra, bra_pos, 'default') 387 assert not (case and default) 388 if case or default: # this ?: expression makes things much harder.... 389 case_code = None 390 if case: 391 case_code = 'if SWITCHED or PyJsStrictEq(CONDITION, %s):\n' 392 # we are looking for a first : with count 1. ? gives -1 and : gives +1. 393 count = 0 394 for pos, e in enumerate(bra[case:], case): 395 if e == '?': 396 count -= 1 397 elif e == ':': 398 count += 1 399 if count == 1: 400 break 401 else: 402 raise SyntaxError( 403 'Missing : token after case in switch statement') 404 case_condition = exp_translator( 405 bra[case:pos]) # switch {case CONDITION: statements} 406 case_code = case_code % case_condition 407 case = pos + 1 408 if default: 409 case = except_token(bra, default, ':') 410 case_code = 'if True:\n' 411 # now parse case statements (things after ':' ) 412 cand, case = do_statement(bra, case) 413 while cand: 414 case_code += indent(cand) 415 cand, case = do_statement(bra, case) 416 case_code += indent('SWITCHED = True\n') 417 code += indent(case_code) 418 bra_pos = case 419 else: 420 break 421 # prevent infinite loop :) 422 code += indent('break\n') 423 return code, start 424 425 426def do_pyimport(source, start): 427 start += 8 428 lib, start = parse_identifier(source, start) 429 jlib = 'PyImport_%s' % lib 430 code = 'import %s as %s\n' % (lib, jlib) 431 #check whether valid lib name... 432 try: 433 compile(code, '', 'exec') 434 except: 435 raise SyntaxError( 436 'Invalid Python module name (%s) in pyimport statement' % lib) 437 # var.pyimport will handle module conversion to PyJs object 438 code += 'var.pyimport(%s, %s)\n' % (repr(lib), jlib) 439 return code, start 440 441 442def do_with(source, start): 443 raise NotImplementedError('With statement is not implemented yet :(') 444 445 446KEYWORD_METHODS = { 447 'do': do_dowhile, 448 'while': do_while, 449 'if': do_if, 450 'throw': do_throw, 451 'return': do_return, 452 'continue': do_continue, 453 'break': do_break, 454 'try': do_try, 455 'for': do_for, 456 'switch': do_switch, 457 'var': do_var, 458 'debugger': do_debugger, # this one does not do anything 459 'with': do_with, 460 'pyimport': do_pyimport 461} 462 463#Also not specific statements (harder to detect) 464# Block {} 465# Expression or Empty Statement 466# Label 467# 468# Its easy to recognize block but harder to distinguish between label and expression statement 469 470 471def translate_flow(source): 472 """Source cant have arrays, object, constant or function literals. 473 Returns PySource and variables to register""" 474 global TO_REGISTER 475 TO_REGISTER = [] 476 return do_block('{%s}' % source, 0)[0], TO_REGISTER 477 478 479if __name__ == '__main__': 480 #print do_dowhile('do {} while(k+f)', 0)[0] 481 #print 'e: "%s"'%do_expression('++(c?g:h); mj', 0)[0] 482 print(translate_flow('a; yimport test')[0]) 483