1from wfuzz.exception import (
2    FuzzExceptMissingAPIKey,
3    FuzzExceptResourceParseError,
4    FuzzExceptPluginLoadError,
5)
6from wfuzz.facade import Facade
7from wfuzz.helpers.utils import MyCounter
8
9
10# Python 2 and 3: alternative 4
11try:
12    from urllib.request import Request
13    from urllib.request import build_opener
14except ImportError:
15    from urllib2 import Request
16    from urllib2 import build_opener
17
18import json
19
20# python 2 and 3: iterator
21from builtins import object
22from threading import Thread
23from queue import Queue
24
25IMPORTED_SHODAN = True
26try:
27    import shodan
28except ImportError:
29    IMPORTED_SHODAN = False
30
31m = {
32    "matches": [
33        {
34            "_shodan": {
35                "id": "54e0ae62-9e22-404b-91b4-92f99e89c987",
36                "options": {},
37                "ptr": True,
38                "module": "auto",
39                "crawler": "62861a86c4e4b71dceed5113ce9593b98431f89a",
40            },
41            "hash": -1355923443,
42            "os": None,
43            "ip": 1240853908,
44            "isp": "Comcast Cable",
45            "http": {
46                "html_hash": -2142469325,
47                "robots_hash": None,
48                "redirects": [],
49                "securitytxt": None,
50                "title": "400 Bad Request",
51                "sitemap_hash": None,
52                "robots": None,
53                "favicon": None,
54                "host": "73.245.237.148",
55                "html": '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">\n<html><head>\n<title>400 Bad Request</title>\n</head><body>\n<h1>Bad Request</h1>\n<p>Your browser sent a request that this server could not understand.<br />\nReason: You\'re speaking plain HTTP to an SSL-enabled server port.<br />\n Instead use the HTTPS scheme to access this URL, please.<br />\n</p>\n<p>Additionally, a 404 Not Found\nerror was encountered while trying to use an ErrorDocument to handle the request.</p>\n</body></html>\n',
56                "location": "/",
57                "components": {},
58                "server": "Apache",
59                "sitemap": None,
60                "securitytxt_hash": None,
61            },
62            "port": 9445,
63            "hostnames": ["c-73-245-237-148.hsd1.fl.comcast.net"],
64            "location": {
65                "city": "Fort Lauderdale",
66                "region_code": "FL",
67                "area_code": 954,
68                "longitude": -80.3704,
69                "country_code3": "USA",
70                "country_name": "United States",
71                "postal_code": "33331",
72                "dma_code": 528,
73                "country_code": "US",
74                "latitude": 26.065200000000004,
75            },
76            "timestamp": "2019-04-10T10:30:48.297701",
77            "domains": ["comcast.net"],
78            "org": "Comcast Cable",
79            "data": "HTTP/1.1 400 Bad Request\r\nDate: Wed, 10 Apr 2019 10:19:07 GMT\r\nServer: Apache\r\nContent-Length: 481\r\nConnection: close\r\nContent-Type: text/html; charset=iso-8859-1\r\n\r\n",
80            "asn": "AS7922",
81            "transport": "tcp",
82            "ip_str": "73.245.237.148",
83        },
84        {
85            "_shodan": {
86                "id": "4ace6fd1-8295-4aea-a086-2280598ca9e7",
87                "options": {},
88                "ptr": True,
89                "module": "auto",
90                "crawler": "62861a86c4e4b71dceed5113ce9593b98431f89a",
91            },
92            "product": "Apache httpd",
93            "hash": 370611044,
94            "os": None,
95            "ip": 35226500,
96            "isp": "EE High Speed Internet",
97            "http": {
98                "html_$ ash": -163723763,
99                "robots_hash": None,
100                "redirects": [],
101                "securitytxt": None,
102                "title": "401 Authorization Required",
103                "sitemap_hash": None,
104                "robots": None,
105                "favicon": None,
106                "host": "2.25.131.132",
107                "html": "<HEAD><TITLE>401 Authorization Required</TITLE></HEAD>\n<BODY><H1>401 Authoriza$ ion Required</H1>\nBrowser not authentication-capable or authentication failed.\n</BODY>\n",
108                "location": "/",
109                "components": {},
110                "server": "Apache",
111                "sitemap": None,
112                "securitytxt_hash": None,
113            },
114            "cpe": ["cpe:/a:apache:http_server"],
115            "port": 8085,
116            "hostnames": [],
117            "location": {
118                "city": "$ helmsford",
119                "region_code": "E4",
120                "area_code": None,
121                "longitude": 0.48330000000001405,
122                "country_code3": "GBR",
123                "country_name": "United Kingdom",
124                "postal_code": "CM2",
125                "dma_code": None,
126                "country_code": "GB",
127                "latitude": 51.733300000000014,
128            },
129            "timestamp": "2019-04-10T11:03:59.955967",
130            "$ omains": [],
131            "org": "EE High Speed Internet",
132            "data": 'HTTP/1.1 401 Unauthorized\r\nServer: Apache\r\nConnection: Close\r\nContent-type: text/html\r\nWWW-Authenticate: Digest realm="DSLForum CPE Management", algorithm=MD5, qop=auth, stale=FALSE, nonce="3d7a3f71e72e095dba31fd77d4db74$5", opaque="5ccc069c403ebaf9f0171e9517f40e41"\r\n\r\n',
133            "asn": "AS12576",
134            "transport": "tcp",
135            "ip_str": "2.25.131.132",
136        },
137    ]
138}
139
140
141class BingIter(object):
142    def __init__(self, dork, offset=0, limit=0, key=None):
143        if key is None:
144            key = Facade().sett.get("plugins", "bing_apikey")
145
146        if not key:
147            raise FuzzExceptMissingAPIKey(
148                "An api Bing key is needed. Please chek wfuzz.ini."
149            )
150
151        self._key = key
152        self._dork = dork
153
154        self.max_count = 0
155        self.current = 0
156        self._index = 0
157        self._retrieved = 0
158        self._results = []
159
160        # first bing request to get estimated total count (it does not take into consideration offset).
161        if limit > 0 and limit < 50:
162            total_results, self._retrieved, self._results = self._do_search(
163                offset, limit
164            )
165        else:
166            total_results, self._retrieved, self._results = self._do_search(offset)
167
168        # offset not over the results
169        if offset > total_results:
170            self._offset = total_results
171        else:
172            self._offset = offset
173
174        self.max_count = total_results - self._offset
175
176        # no more than limit results
177        if self.max_count > limit and limit > 0:
178            self.max_count = limit
179
180    def _do_search(self, offset=0, limit=50):
181        # some code taken from http://www.securitybydefault.com/2014/07/search2auditpy-deja-que-bing-haga-el.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+SecurityByDefault+%28Security+By+Default%29
182        # api doc http://go.microsoft.com/fwlink/?LinkID=248077
183        user_agent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; FDM; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 1.1.4322)"
184        creds = (":%s" % self._key).encode("base64")[:-1]
185        auth = "Basic %s" % creds
186
187        result = None
188
189        try:
190            urlstr = (
191                "https://api.datamarket.azure.com/Data.ashx/Bing/Search/Composite?Sources=%27web%27&Query=%27"
192                + self._dork
193                + "%27&$format=json"
194            )
195            if limit != 50:
196                urlstr += "&$top=%d" % limit
197            if offset != 0:
198                urlstr += "&$skip=%d" % offset
199
200            request = Request(urlstr)
201
202            request.add_header("Authorization", auth)
203            request.add_header("User-Agent", user_agent)
204            requestor = build_opener()
205            result = requestor.open(request)
206        except Exception as e:
207            raise FuzzExceptResourceParseError(
208                "Error when retrieving Bing API results: %s." % str(e)
209            )
210
211        results = json.loads(result.read())
212
213        # WebTotal is not reliable, it is usually much bigger than the actual results, therefore
214        # if your offset increases over the real number of results, you get a dict
215        # without values and counters to ''. It gets updated when you are close to that limit though.
216        if results["d"]["results"][0]["WebTotal"]:
217            res_total = int(results["d"]["results"][0]["WebTotal"])
218            res_list = results["d"]["results"][0]["Web"]
219
220            return res_total, len(res_list), res_list
221        else:
222            return 0, 0, 0
223
224    def __iter__(self):
225        return self
226
227    def __next__(self):
228        if self.current >= self.max_count:
229            raise StopIteration
230
231        # Result buffer already consumed
232        if self._index >= self._retrieved:
233            realcount, self._retrieved, self._results = self._do_search(
234                self.current + self._offset
235            )
236
237            self._index = 0
238
239            # update real count
240            if self.max_count > realcount:
241                self.max_count = realcount
242
243        elem = self._results[self._index]["Url"].strip()
244
245        self.current += 1
246        self._index += 1
247
248        # pycurl does not like unicode
249        if isinstance(elem, str):
250            return elem.encode("utf-8")
251        else:
252            return elem
253
254
255class ShodanIter:
256    SHODAN_RES_PER_PAGE = 100
257    MAX_ENQUEUED_RES = SHODAN_RES_PER_PAGE + 1
258    NUM_OF_WORKERS = 1
259    SLOW_START = True
260
261    def __init__(self, dork, page, limit):
262        if IMPORTED_SHODAN is False:
263            raise FuzzExceptPluginLoadError(
264                "shodan module not imported. Please, install shodan using pip"
265            )
266
267        key = Facade().sett.get("plugins", "shodan_apikey")
268        if not key:
269            raise FuzzExceptMissingAPIKey(
270                "A Shodan api key is needed. Please check ~/.wfuzz/wfuzz.ini"
271            )
272
273        self.api = shodan.Shodan(key)
274        self._dork = dork
275        self._page = MyCounter(page)
276        self._page_limit = self._page() + limit if limit > 0 else -1
277
278        self.results_queue = Queue(self.MAX_ENQUEUED_RES)
279        self.page_queue = Queue()
280
281        self._threads = []
282
283        self._started = False
284        self._cancel_job = False
285
286    def _do_search(self):
287        while 1:
288            page = self.page_queue.get()
289            if page is None:
290                self.page_queue.task_done()
291                break
292
293            if self._cancel_job:
294                self.page_queue.task_done()
295                continue
296
297            if self._page_limit > 0 and page >= self._page_limit:
298                self.page_queue.task_done()
299                self.results_queue.put(None)
300                continue
301
302            try:
303                results = self.api.search(self._dork, page=page)
304                for item in results["matches"]:
305                    if not self._cancel_job:
306                        self.results_queue.put(item)
307
308                self.page_queue.task_done()
309                if not self._cancel_job:
310                    self.page_queue.put(self._page.inc())
311            except shodan.APIError as e:
312                self.page_queue.task_done()
313                if "Invalid page size" in str(e):
314                    self.results_queue.put(None)
315                elif "Insufficient query credits" in str(e):
316                    self.results_queue.put(None)
317                else:
318                    self.results_queue.put(e)
319                continue
320
321    def __iter__(self):
322        return self
323
324    def _start(self):
325        for th_n in range(self.NUM_OF_WORKERS):
326            worker = Thread(target=self._do_search)
327            worker.setName("_do_search_{}".format(str(th_n)))
328            self._threads.append(worker)
329            worker.start()
330
331        self.page_queue.put(self._page())
332        if not self.SLOW_START:
333            for _ in range(self.NUM_OF_WORKERS - 1):
334                self.page_queue.put(self._page.inc())
335
336    def _stop(self):
337        self._cancel_job = True
338
339        for th in self._threads:
340            self.page_queue.put(None)
341
342        self.page_queue.join()
343
344        for th in self._threads:
345            th.join()
346
347        self._threads = []
348
349        self.results_queue.put(None)
350
351    def __next__(self):
352        if not self._started:
353            self._start()
354            self._started = True
355
356        res = self.results_queue.get()
357        self.results_queue.task_done()
358
359        if res is None:
360            self._stop()
361            self._cancel_job = False
362            self._started = False
363            raise StopIteration
364        elif isinstance(res, Exception):
365            self._stop()
366            raise res
367
368        return res
369