1"""This module translates JS flow into PY flow.
2
3Translates:
4IF ELSE
5
6DO WHILE
7WHILE
8FOR 123
9FOR iter
10CONTINUE, BREAK, RETURN, LABEL, THROW, TRY, SWITCH
11"""
12from __future__ import print_function
13
14from utils import *
15from jsparser import *
16from nodevisitor import exp_translator
17import random
18
19TO_REGISTER = []
20CONTINUE_LABEL = 'JS_CONTINUE_LABEL_%s'
21BREAK_LABEL = 'JS_BREAK_LABEL_%s'
22
23PREPARE = '''HOLDER = var.own.get(NAME)\nvar.force_own_put(NAME, PyExceptionToJs(PyJsTempException))\n'''
24RESTORE = '''if HOLDER is not None:\n    var.own[NAME] = HOLDER\nelse:\n    del var.own[NAME]\ndel HOLDER\n'''
25TRY_CATCH = '''%stry:\nBLOCKfinally:\n%s''' % (PREPARE, indent(RESTORE))
26
27
28def get_continue_label(label):
29    return CONTINUE_LABEL % label.encode('hex')
30
31
32def get_break_label(label):
33    return BREAK_LABEL % label.encode('hex')
34
35
36def pass_until(source, start, tokens=(';', )):
37    while start < len(source) and source[start] not in tokens:
38        start += 1
39    return start + 1
40
41
42def do_bracket_exp(source, start, throw=True):
43    bra, cand = pass_bracket(source, start, '()')
44    if throw and not bra:
45        raise SyntaxError('Missing bracket expression')
46    bra = exp_translator(bra[1:-1])
47    if throw and not bra:
48        raise SyntaxError('Empty bracket condition')
49    return bra, cand if bra else start
50
51
52def do_if(source, start):
53    start += 2  # pass this if
54    bra, start = do_bracket_exp(source, start, throw=True)
55    statement, start = do_statement(source, start)
56    if statement is None:
57        raise SyntaxError('Invalid if statement')
58    translated = 'if %s:\n' % bra + indent(statement)
59
60    elseif = except_keyword(source, start, 'else')
61    is_elseif = False
62    if elseif:
63        start = elseif
64        if except_keyword(source, start, 'if'):
65            is_elseif = True
66        elseif, start = do_statement(source, start)
67        if elseif is None:
68            raise SyntaxError('Invalid if statement)')
69        if is_elseif:
70            translated += 'el' + elseif
71        else:
72            translated += 'else:\n' + indent(elseif)
73    return translated, start
74
75
76def do_statement(source, start):
77    """returns none if not found other functions that begin with 'do_' raise
78    also this do_ type function passes white space"""
79    start = pass_white(source, start)
80    # start is the fist position after initial start that is not a white space or \n
81    if not start < len(source):  #if finished parsing return None
82        return None, start
83    if any(startswith_keyword(source[start:], e) for e in {'case', 'default'}):
84        return None, start
85    rest = source[start:]
86    for key, meth in KEYWORD_METHODS.iteritems(
87    ):  # check for statements that are uniquely defined by their keywords
88        if rest.startswith(key):
89            # has to startwith this keyword and the next letter after keyword must be either EOF or not in IDENTIFIER_PART
90            if len(key) == len(rest) or rest[len(key)] not in IDENTIFIER_PART:
91                return meth(source, start)
92    if rest[0] == '{':  #Block
93        return do_block(source, start)
94    # Now only label and expression left
95    cand = parse_identifier(source, start, False)
96    if cand is not None:  # it can mean that its a label
97        label, cand_start = cand
98        cand_start = pass_white(source, cand_start)
99        if source[cand_start] == ':':
100            return do_label(source, start)
101    return do_expression(source, start)
102
103
104def do_while(source, start):
105    start += 5  # pass while
106    bra, start = do_bracket_exp(source, start, throw=True)
107    statement, start = do_statement(source, start)
108    if statement is None:
109        raise SyntaxError('Missing statement to execute in while loop!')
110    return 'while %s:\n' % bra + indent(statement), start
111
112
113def do_dowhile(source, start):
114    start += 2  # pass do
115    statement, start = do_statement(source, start)
116    if statement is None:
117        raise SyntaxError('Missing statement to execute in do while loop!')
118    start = except_keyword(source, start, 'while')
119    if not start:
120        raise SyntaxError('Missing while keyword in do-while loop')
121    bra, start = do_bracket_exp(source, start, throw=True)
122    statement += 'if not %s:\n' % bra + indent('break\n')
123    return 'while 1:\n' + indent(statement), start
124
125
126def do_block(source, start):
127    bra, start = pass_bracket(source, start, '{}')
128    #print source[start:], bra
129    #return bra +'\n', start
130    if bra is None:
131        raise SyntaxError('Missing block ( {code} )')
132    code = ''
133    bra = bra[1:-1] + ';'
134    bra_pos = 0
135    while bra_pos < len(bra):
136        st, bra_pos = do_statement(bra, bra_pos)
137        if st is None:
138            break
139        code += st
140    bra_pos = pass_white(bra, bra_pos)
141    if bra_pos < len(bra):
142        raise SyntaxError('Block has more code that could not be parsed:\n' +
143                          bra[bra_pos:])
144    return code, start
145
146
147def do_empty(source, start):
148    return 'pass\n', start + 1
149
150
151def do_expression(source, start):
152    start = pass_white(source, start)
153    end = pass_until(source, start, tokens=(';', ))
154    if end == start + 1:  #empty statement
155        return 'pass\n', end
156    # AUTOMATIC SEMICOLON INSERTION FOLLOWS
157    # Without ASI this function would end with: return exp_translator(source[start:end].rstrip(';'))+'\n', end
158    # ASI makes things a bit more complicated:
159    # we will try to parse as much as possible, inserting ; in place of last new line in case of error
160    rev = False
161    rpos = 0
162    while True:
163        try:
164            code = source[start:end].rstrip(';')
165            cand = exp_translator(code) + '\n', end
166            just_to_test = compile(cand[0], '', 'exec')
167            return cand
168        except Exception as e:
169            if not rev:
170                rev = source[start:end][::-1]
171        lpos = rpos
172        while True:
173            rpos = pass_until(rev, rpos, LINE_TERMINATOR)
174            if rpos >= len(rev):
175                raise
176            if filter(lambda x: x not in SPACE, rev[lpos:rpos]):
177                break
178        end = start + len(rev) - rpos + 1
179
180
181def do_var(source, start):
182    #todo auto ; insertion
183    start += 3  #pass var
184    end = pass_until(source, start, tokens=(';', ))
185    defs = argsplit(
186        source[start:end - 1]
187    )  # defs is the list of defined vars with optional initializer
188    code = ''
189    for de in defs:
190        var, var_end = parse_identifier(de, 0, True)
191        TO_REGISTER.append(var)
192        var_end = pass_white(de, var_end)
193        if var_end < len(
194                de
195        ):  # we have something more to parse... It has to start with =
196            if de[var_end] != '=':
197                raise SyntaxError(
198                    'Unexpected initializer in var statement. Expected "=", got "%s"'
199                    % de[var_end])
200            code += exp_translator(de) + '\n'
201    if not code.strip():
202        code = 'pass\n'
203    return code, end
204
205
206def do_label(source, start):
207    label, end = parse_identifier(source, start)
208    end = pass_white(source, end)
209    #now source[end] must be :
210    assert source[end] == ':'
211    end += 1
212    inside, end = do_statement(source, end)
213    if inside is None:
214        raise SyntaxError('Missing statement after label')
215    defs = ''
216    if inside.startswith('while ') or inside.startswith(
217            'for ') or inside.startswith('#for'):
218        # we have to add contine label as well...
219        # 3 or 1 since #for loop type has more lines before real for.
220        sep = 1 if not inside.startswith('#for') else 3
221        cont_label = get_continue_label(label)
222        temp = inside.split('\n')
223        injected = 'try:\n' + '\n'.join(temp[sep:])
224        injected += 'except %s:\n    pass\n' % cont_label
225        inside = '\n'.join(temp[:sep]) + '\n' + indent(injected)
226        defs += 'class %s(Exception): pass\n' % cont_label
227    break_label = get_break_label(label)
228    inside = 'try:\n%sexcept %s:\n    pass\n' % (indent(inside), break_label)
229    defs += 'class %s(Exception): pass\n' % break_label
230    return defs + inside, end
231
232
233def do_for(source, start):
234    start += 3  # pass for
235    entered = start
236    bra, start = pass_bracket(source, start, '()')
237    inside, start = do_statement(source, start)
238    if inside is None:
239        raise SyntaxError('Missing statement after for')
240    bra = bra[1:-1]
241    if ';' in bra:
242        init = argsplit(bra, ';')
243        if len(init) != 3:
244            raise SyntaxError('Invalid for statement')
245        args = []
246        for i, item in enumerate(init):
247            end = pass_white(item, 0)
248            if end == len(item):
249                args.append('' if i != 1 else '1')
250                continue
251            if not i and except_keyword(item, end, 'var') is not None:
252                # var statement
253                args.append(do_var(item, end)[0])
254                continue
255            args.append(do_expression(item, end)[0])
256        return '#for JS loop\n%swhile %s:\n%s%s\n' % (
257            args[0], args[1].strip(), indent(inside), indent(args[2])), start
258    # iteration
259    end = pass_white(bra, 0)
260    register = False
261    if bra[end:].startswith('var '):
262        end += 3
263        end = pass_white(bra, end)
264        register = True
265    name, end = parse_identifier(bra, end)
266    if register:
267        TO_REGISTER.append(name)
268    end = pass_white(bra, end)
269    if bra[end:end + 2] != 'in' or bra[end + 2] in IDENTIFIER_PART:
270        #print source[entered-10:entered+50]
271        raise SyntaxError('Invalid "for x in y" statement')
272    end += 2  # pass in
273    exp = exp_translator(bra[end:])
274    res = 'for temp in %s:\n' % exp
275    res += indent('var.put(%s, temp)\n' % name.__repr__()) + indent(inside)
276    return res, start
277
278
279# todo - IMPORTANT
280def do_continue(source, start, name='continue'):
281    start += len(name)  #pass continue
282    start = pass_white(source, start)
283    if start < len(source) and source[start] == ';':
284        return '%s\n' % name, start + 1
285    # labeled statement or error
286    label, start = parse_identifier(source, start)
287    start = pass_white(source, start)
288    if start < len(source) and source[start] != ';':
289        raise SyntaxError('Missing ; after label name in %s statement' % name)
290    return 'raise %s("%s")\n' % (get_continue_label(label)
291                                 if name == 'continue' else
292                                 get_break_label(label), name), start + 1
293
294
295def do_break(source, start):
296    return do_continue(source, start, 'break')
297
298
299def do_return(source, start):
300    start += 6  # pass return
301    end = source.find(';', start) + 1
302    if end == -1:
303        end = len(source)
304    trans = exp_translator(source[start:end].rstrip(';'))
305    return 'return %s\n' % (trans if trans else "var.get('undefined')"), end
306
307
308# todo later?- Also important
309def do_throw(source, start):
310    start += 5  # pass throw
311    end = source.find(';', start) + 1
312    if not end:
313        end = len(source)
314    trans = exp_translator(source[start:end].rstrip(';'))
315    if not trans:
316        raise SyntaxError('Invalid throw statement: nothing to throw')
317    res = 'PyJsTempException = JsToPyException(%s)\nraise PyJsTempException\n' % trans
318    return res, end
319
320
321def do_try(source, start):
322    start += 3  # pass try
323    block, start = do_block(source, start)
324    result = 'try:\n%s' % indent(block)
325    catch = except_keyword(source, start, 'catch')
326    if catch:
327        bra, catch = pass_bracket(source, catch, '()')
328        bra = bra[1:-1]
329        identifier, bra_end = parse_identifier(bra, 0)
330        holder = 'PyJsHolder_%s_%d' % (identifier.encode('hex'),
331                                       random.randrange(1e8))
332        identifier = identifier.__repr__()
333        bra_end = pass_white(bra, bra_end)
334        if bra_end < len(bra):
335            raise SyntaxError('Invalid content of catch statement')
336        result += 'except PyJsException as PyJsTempException:\n'
337        block, catch = do_block(source, catch)
338        # fill in except ( catch ) block and remember to recover holder variable to its previous state
339        result += indent(
340            TRY_CATCH.replace('HOLDER', holder).replace('NAME',
341                                                        identifier).replace(
342                                                            'BLOCK',
343                                                            indent(block)))
344    start = max(catch, start)
345    final = except_keyword(source, start, 'finally')
346    if not (final or catch):
347        raise SyntaxError(
348            'Try statement has to be followed by catch or finally')
349    if not final:
350        return result, start
351    # translate finally statement
352    block, start = do_block(source, final)
353    return result + 'finally:\n%s' % indent(block), start
354
355
356def do_debugger(source, start):
357    start += 8  # pass debugger
358    end = pass_white(source, start)
359    if end < len(source) and source[end] == ';':
360        end += 1
361    return 'pass\n', end  #ignore errors...
362
363
364# todo automatic ; insertion. fuck this crappy feature
365
366# Least important
367
368
369def do_switch(source, start):
370    start += 6  # pass switch
371    code = 'while 1:\n' + indent('SWITCHED = False\nCONDITION = (%s)\n')
372    # parse value of check
373    val, start = pass_bracket(source, start, '()')
374    if val is None:
375        raise SyntaxError('Missing () after switch statement')
376    if not val.strip():
377        raise SyntaxError('Missing content inside () after switch statement')
378    code = code % exp_translator(val)
379    bra, start = pass_bracket(source, start, '{}')
380    if bra is None:
381        raise SyntaxError('Missing block {} after switch statement')
382    bra_pos = 0
383    bra = bra[1:-1] + ';'
384    while True:
385        case = except_keyword(bra, bra_pos, 'case')
386        default = except_keyword(bra, bra_pos, 'default')
387        assert not (case and default)
388        if case or default:  # this ?: expression makes things much harder....
389            case_code = None
390            if case:
391                case_code = 'if SWITCHED or PyJsStrictEq(CONDITION, %s):\n'
392                # we are looking for a first : with count 1. ? gives -1 and : gives +1.
393                count = 0
394                for pos, e in enumerate(bra[case:], case):
395                    if e == '?':
396                        count -= 1
397                    elif e == ':':
398                        count += 1
399                        if count == 1:
400                            break
401                else:
402                    raise SyntaxError(
403                        'Missing : token after case in switch statement')
404                case_condition = exp_translator(
405                    bra[case:pos])  # switch {case CONDITION: statements}
406                case_code = case_code % case_condition
407                case = pos + 1
408            if default:
409                case = except_token(bra, default, ':')
410                case_code = 'if True:\n'
411            # now parse case statements (things after ':' )
412            cand, case = do_statement(bra, case)
413            while cand:
414                case_code += indent(cand)
415                cand, case = do_statement(bra, case)
416            case_code += indent('SWITCHED = True\n')
417            code += indent(case_code)
418            bra_pos = case
419        else:
420            break
421    # prevent infinite loop :)
422    code += indent('break\n')
423    return code, start
424
425
426def do_pyimport(source, start):
427    start += 8
428    lib, start = parse_identifier(source, start)
429    jlib = 'PyImport_%s' % lib
430    code = 'import %s as %s\n' % (lib, jlib)
431    #check whether valid lib name...
432    try:
433        compile(code, '', 'exec')
434    except:
435        raise SyntaxError(
436            'Invalid Python module name (%s) in pyimport statement' % lib)
437    # var.pyimport will handle module conversion to PyJs object
438    code += 'var.pyimport(%s, %s)\n' % (repr(lib), jlib)
439    return code, start
440
441
442def do_with(source, start):
443    raise NotImplementedError('With statement is not implemented yet :(')
444
445
446KEYWORD_METHODS = {
447    'do': do_dowhile,
448    'while': do_while,
449    'if': do_if,
450    'throw': do_throw,
451    'return': do_return,
452    'continue': do_continue,
453    'break': do_break,
454    'try': do_try,
455    'for': do_for,
456    'switch': do_switch,
457    'var': do_var,
458    'debugger': do_debugger,  # this one does not do anything
459    'with': do_with,
460    'pyimport': do_pyimport
461}
462
463#Also not specific statements (harder to detect)
464# Block {}
465# Expression or Empty Statement
466# Label
467#
468# Its easy to recognize block but harder to distinguish between label and expression statement
469
470
471def translate_flow(source):
472    """Source cant have arrays, object, constant or function literals.
473       Returns PySource and variables to register"""
474    global TO_REGISTER
475    TO_REGISTER = []
476    return do_block('{%s}' % source, 0)[0], TO_REGISTER
477
478
479if __name__ == '__main__':
480    #print do_dowhile('do {} while(k+f)', 0)[0]
481    #print 'e: "%s"'%do_expression('++(c?g:h);   mj', 0)[0]
482    print(translate_flow('a; yimport test')[0])
483