1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# LameJs - A very basic javascript interpreter in Python
4# This file is part of the Wapiti project (http://wapiti.sourceforge.io)
5# Copyright (C) 2013-2020 Nicolas Surribas
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20import logging
21import re
22
23from wapitiCore.net.jsparser import jsparser3
24
25
26class LameJs:
27
28    def __init__(self, data):
29        self.js_vars = {}
30        self.links = []
31        self.debug = False
32        # https://stackoverflow.com/questions/5780047/html-comments-in-a-javascript-block
33        # trick used by http://php.testsparker.com/
34        data = re.sub(r"(?m)^[^\S\n]*<!--", "//", data)
35        data = re.sub(r"(?m)^[^\S\n]*--", "//", data)
36        try:
37            self.js_vars = {}
38            self.links = []
39            rootnode = jsparser3.parse(data, None, 0)
40            self.read_node(rootnode)
41        except Exception:
42            pass
43
44    def get_vars(self):
45        return self.js_vars
46
47    def get_links(self):
48        return self.links
49
50    def read_node(self, node):
51        if node.type == "SCRIPT":
52            logging.debug("# SCRIPT")
53            for sub_node in node:
54                self.read_node(sub_node)
55        elif node.type == "VAR":
56            logging.debug("# VAR IN")
57            logging.debug("# VAR OUT {}".format(self.read_node(node[0])))
58        elif node.type == "IDENTIFIER":
59            logging.debug("# IDENTIFIER")
60            if hasattr(node, 'initializer'):
61                value = self.read_node(node.initializer)
62                self.js_vars[node.value] = value
63                return node.value, value
64            else:
65                return self.js_vars.get(node.value)
66        elif node.type == "NUMBER":
67            logging.debug("# NUMBER")
68            return node.value
69        elif node.type == "STRING":
70            logging.debug("# STRING")
71            return node.value
72        elif node.type == "PLUS":
73            logging.debug("# PLUS")
74            eax = None
75            # It some items of concatenation includes function calls or accessing parts of array, stop here to prevent
76            # false positives
77            if set([sub_node.type for sub_node in node]) & {"CALL", "INDEX"}:
78                return None
79
80            for sub_node in node:
81                value = self.read_node(sub_node)
82                if eax is None:
83                    eax = value
84                else:
85                    if isinstance(eax, str):
86                        if isinstance(value, str):
87                            eax += value
88                        elif isinstance(value, int):
89                            eax += str(value)
90                    elif isinstance(eax, int):
91                        if isinstance(value, str):
92                            eax = str(eax) + value
93                        elif isinstance(value, int):
94                            eax += value
95
96            return eax
97        elif node.type == "FUNCTION":
98            logging.debug("# FUNCTION")
99            try:
100                func_name = node.name
101            except AttributeError:
102                func_name = "anonymous"
103            logging.debug("In function {0}".format(func_name))
104            self.read_node(node.body)
105        elif node.type == "SEMICOLON":
106            logging.debug("# SEMICOLON")
107            self.read_node(node.expression)
108            logging.debug("Semicolon end")
109        elif node.type == "CALL":
110            logging.debug("# CALL")
111            func_name = self.read_node(node[0])
112            if not func_name:
113                func_name = "anonymous"
114            params = self.read_node(node[1])
115            logging.debug("func_name = {0}".format(func_name))
116            logging.debug("params = {0}".format(params))
117            if func_name == "window.open":
118                if len(params) and params[0]:
119                    self.links.append(params[0])
120            elif func_name.endswith(".asyncRequest"):
121                if len(params) > 1:
122                    if params[0].upper() in ["GET", "POST"]:
123                        self.links.append(params[1])
124        elif node.type == "DOT":
125            logging.debug("# DOT")
126            return ".".join([sub_node.value for sub_node in node])
127        elif node.type == "LIST":
128            logging.debug("# LIST")
129            ll = []
130            for sub_node in node:
131                ll.append(self.read_node(sub_node))
132            logging.debug("list = {0}".format(ll))
133            return ll
134        elif node.type == "ASSIGN":
135            logging.debug("# ASSIGN")
136            left_value = self.read_node(node[0])
137            if node[1].type != "DOT":
138                # Seems too complicated to process objects attributes...
139                right_value = self.read_node(node[1])
140                logging.debug("left_value = {0}".format(left_value))
141                logging.debug("right_value = {0}".format(right_value))
142                if right_value and (
143                    left_value.endswith(".href") or
144                    left_value.endswith(".action") or
145                    left_value.endswith(".location") or
146                    left_value.endswith(".src")
147                ):
148                    if node[1].type == "IDENTIFIER" and self.js_vars.get(right_value):
149                        self.links.append(self.js_vars[right_value])
150                    else:
151                        self.links.append(right_value)
152        elif node.type == "WITH":
153            logging.debug("# WITH")
154            for sub_node in node.body:
155                self.read_node(sub_node)
156        elif node.type == "PROPERTY_INIT":
157            logging.debug("# PROPERTY_INIT")
158            attrib_name = self.read_node(node[0])
159            attrib_value = self.read_node(node[1])
160            logging.debug("attrib_name = {0}".format(attrib_name))
161            logging.debug("attrib_value = {0}".format(attrib_value))
162            return attrib_name
163        elif node.type == "OBJECT_INIT":
164            logging.debug("# OBJECT_INIT")
165            for sub_node in node:
166                self.read_node(sub_node)
167            logging.debug("OBJECT_INIT end")
168        elif node == "REGEXP":
169            logging.debug("# REGEXP")
170            return node.value
171        elif node == "THIS":
172            logging.debug("# THIS")
173            return "this"
174        else:
175            logging.debug("? {}".format(node.type))
176
177
178if __name__ == "__main__":
179    logging.basicConfig(level=logging.DEBUG)
180
181    data3 = """
182    function yolo() {
183      u='http://www.website.com/page.php?uid=1';
184      t='Hi there';
185      window.open('http://www.facebook.com/sharer.php?u='+encodeURIComponent(u)+'&t='+encodeURIComponent(t),'sharer','toolbar=0,status=0,width=626,height=436');
186      return false;
187    }"""
188
189    lame_js = LameJs(data3)
190    print(lame_js.get_links())
191