1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# LameJs - A very basic javascript interpreter in Python 4# This file is part of the Wapiti project (http://wapiti.sourceforge.io) 5# Copyright (C) 2013-2020 Nicolas Surribas 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 2 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program; if not, write to the Free Software 19# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20import logging 21import re 22 23from wapitiCore.net.jsparser import jsparser3 24 25 26class LameJs: 27 28 def __init__(self, data): 29 self.js_vars = {} 30 self.links = [] 31 self.debug = False 32 # https://stackoverflow.com/questions/5780047/html-comments-in-a-javascript-block 33 # trick used by http://php.testsparker.com/ 34 data = re.sub(r"(?m)^[^\S\n]*<!--", "//", data) 35 data = re.sub(r"(?m)^[^\S\n]*--", "//", data) 36 try: 37 self.js_vars = {} 38 self.links = [] 39 rootnode = jsparser3.parse(data, None, 0) 40 self.read_node(rootnode) 41 except Exception: 42 pass 43 44 def get_vars(self): 45 return self.js_vars 46 47 def get_links(self): 48 return self.links 49 50 def read_node(self, node): 51 if node.type == "SCRIPT": 52 logging.debug("# SCRIPT") 53 for sub_node in node: 54 self.read_node(sub_node) 55 elif node.type == "VAR": 56 logging.debug("# VAR IN") 57 logging.debug("# VAR OUT {}".format(self.read_node(node[0]))) 58 elif node.type == "IDENTIFIER": 59 logging.debug("# IDENTIFIER") 60 if hasattr(node, 'initializer'): 61 value = self.read_node(node.initializer) 62 self.js_vars[node.value] = value 63 return node.value, value 64 else: 65 return self.js_vars.get(node.value) 66 elif node.type == "NUMBER": 67 logging.debug("# NUMBER") 68 return node.value 69 elif node.type == "STRING": 70 logging.debug("# STRING") 71 return node.value 72 elif node.type == "PLUS": 73 logging.debug("# PLUS") 74 eax = None 75 # It some items of concatenation includes function calls or accessing parts of array, stop here to prevent 76 # false positives 77 if set([sub_node.type for sub_node in node]) & {"CALL", "INDEX"}: 78 return None 79 80 for sub_node in node: 81 value = self.read_node(sub_node) 82 if eax is None: 83 eax = value 84 else: 85 if isinstance(eax, str): 86 if isinstance(value, str): 87 eax += value 88 elif isinstance(value, int): 89 eax += str(value) 90 elif isinstance(eax, int): 91 if isinstance(value, str): 92 eax = str(eax) + value 93 elif isinstance(value, int): 94 eax += value 95 96 return eax 97 elif node.type == "FUNCTION": 98 logging.debug("# FUNCTION") 99 try: 100 func_name = node.name 101 except AttributeError: 102 func_name = "anonymous" 103 logging.debug("In function {0}".format(func_name)) 104 self.read_node(node.body) 105 elif node.type == "SEMICOLON": 106 logging.debug("# SEMICOLON") 107 self.read_node(node.expression) 108 logging.debug("Semicolon end") 109 elif node.type == "CALL": 110 logging.debug("# CALL") 111 func_name = self.read_node(node[0]) 112 if not func_name: 113 func_name = "anonymous" 114 params = self.read_node(node[1]) 115 logging.debug("func_name = {0}".format(func_name)) 116 logging.debug("params = {0}".format(params)) 117 if func_name == "window.open": 118 if len(params) and params[0]: 119 self.links.append(params[0]) 120 elif func_name.endswith(".asyncRequest"): 121 if len(params) > 1: 122 if params[0].upper() in ["GET", "POST"]: 123 self.links.append(params[1]) 124 elif node.type == "DOT": 125 logging.debug("# DOT") 126 return ".".join([sub_node.value for sub_node in node]) 127 elif node.type == "LIST": 128 logging.debug("# LIST") 129 ll = [] 130 for sub_node in node: 131 ll.append(self.read_node(sub_node)) 132 logging.debug("list = {0}".format(ll)) 133 return ll 134 elif node.type == "ASSIGN": 135 logging.debug("# ASSIGN") 136 left_value = self.read_node(node[0]) 137 if node[1].type != "DOT": 138 # Seems too complicated to process objects attributes... 139 right_value = self.read_node(node[1]) 140 logging.debug("left_value = {0}".format(left_value)) 141 logging.debug("right_value = {0}".format(right_value)) 142 if right_value and ( 143 left_value.endswith(".href") or 144 left_value.endswith(".action") or 145 left_value.endswith(".location") or 146 left_value.endswith(".src") 147 ): 148 if node[1].type == "IDENTIFIER" and self.js_vars.get(right_value): 149 self.links.append(self.js_vars[right_value]) 150 else: 151 self.links.append(right_value) 152 elif node.type == "WITH": 153 logging.debug("# WITH") 154 for sub_node in node.body: 155 self.read_node(sub_node) 156 elif node.type == "PROPERTY_INIT": 157 logging.debug("# PROPERTY_INIT") 158 attrib_name = self.read_node(node[0]) 159 attrib_value = self.read_node(node[1]) 160 logging.debug("attrib_name = {0}".format(attrib_name)) 161 logging.debug("attrib_value = {0}".format(attrib_value)) 162 return attrib_name 163 elif node.type == "OBJECT_INIT": 164 logging.debug("# OBJECT_INIT") 165 for sub_node in node: 166 self.read_node(sub_node) 167 logging.debug("OBJECT_INIT end") 168 elif node == "REGEXP": 169 logging.debug("# REGEXP") 170 return node.value 171 elif node == "THIS": 172 logging.debug("# THIS") 173 return "this" 174 else: 175 logging.debug("? {}".format(node.type)) 176 177 178if __name__ == "__main__": 179 logging.basicConfig(level=logging.DEBUG) 180 181 data3 = """ 182 function yolo() { 183 u='http://www.website.com/page.php?uid=1'; 184 t='Hi there'; 185 window.open('http://www.facebook.com/sharer.php?u='+encodeURIComponent(u)+'&t='+encodeURIComponent(t),'sharer','toolbar=0,status=0,width=626,height=436'); 186 return false; 187 }""" 188 189 lame_js = LameJs(data3) 190 print(lame_js.get_links()) 191