1#!/usr/bin/env ruby
2=begin
3
4.$$$     $.                                   .$$$     $.
5$$$$     $$. .$$$  $$$ .$$$$$$.  .$$$$$$$$$$. $$$$     $$. .$$$$$$$. .$$$$$$.
6$ $$     $$$ $ $$  $$$ $ $$$$$$. $$$$$ $$$$$$ $ $$     $$$ $ $$   $$ $ $$$$$$.
7$ `$     $$$ $ `$  $$$ $ `$  $$$ $$' $ `$ `$$ $ `$     $$$ $ `$      $ `$  $$$'
8$. $     $$$ $. $$$$$$ $. $$$$$$ `$  $. $  :' $. $     $$$ $. $$$$   $. $$$$$.
9$::$  .  $$$ $::$  $$$ $::$  $$$     $::$     $::$  .  $$$ $::$      $::$  $$$$
10$;;$ $$$ $$$ $;;$  $$$ $;;$  $$$     $;;$     $;;$ $$$ $$$ $;;$      $;;$  $$$$
11$$$$$$ $$$$$ $$$$  $$$ $$$$  $$$     $$$$     $$$$$$ $$$$$ $$$$$$$$$ $$$$$$$$$'
12
13
14WhatWeb - Next generation web scanner.
15Author: Andrew Horton aka urbanadventurer
16
17Homepage: http://www.morningstarsecurity.com/research/whatweb
18
19Copyright 2009-2016 Andrew Horton <andrew at morningstarsecurity dot com>
20
21This file is part of WhatWeb.
22
23WhatWeb is free software: you can redistribute it and/or modify
24it under the terms of the GNU General Public License as published by
25the Free Software Foundation, either version 2 of the License, or
26(at your option) any later version.
27
28WhatWeb is distributed in the hope that it will be useful,
29but WITHOUT ANY WARRANTY; without even the implied warranty of
30MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31GNU General Public License for more details.
32
33You should have received a copy of the GNU General Public License
34along with WhatWeb.  If not, see <http://www.gnu.org/licenses/>.
35=end
36
37
38#require 'profile'
39require 'getoptlong'
40require 'pp'
41require 'net/http'
42require 'open-uri'
43require 'cgi'
44require 'thread'
45require 'tempfile'
46require 'rbconfig'  # detect environment, e.g. windows or linux
47require 'resolv'
48require 'resolv-replace' # asynchronous DNS
49require 'open-uri'
50
51
52
53## set up load paths - must be before loading lib/ files
54# add the directory of the file currently being executed to the load path
55$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless
56    $:.include?(File.dirname(__FILE__)) || $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
57$LOAD_PATH << "/usr/local/lib/ruby/site_ruby/2.7/whatweb"
58
59# if __FILE__ is a symlink then follow *every* symlink
60if File.symlink?(__FILE__)
61  require 'pathname'
62  $LOAD_PATH << File.dirname( Pathname.new(__FILE__).realpath )
63end
64
65
66# Ruby Version Compatability
67if RUBY_VERSION =~ /^1\.9/
68	require 'digest/md5'
69    require 'lib/extend-http_ruby1.9.rb'
70
71elsif RUBY_VERSION =~ /^2\./
72	require 'digest/md5'
73	require 'lib/extend-http_ruby2.rb'
74
75elsif RUBY_VERSION =~ /^1\.8/
76	puts "Sorry but Ruby 1.8 is not supported. WhatWeb requires Ruby 1.9 or later."
77	exit 1
78
79else
80	puts "Unsupported version of Ruby"
81	exit 1
82
83end
84
85
86### gem detection & loading
87def gem_available?(gemname)
88	#  gem_available_new_rubygems?(gemname) or gem_available_old_rubygems?(gemname)
89	if defined?(Gem::Specification) and defined?(Gem::Specification.find_by_name)
90		gem_available_new_rubygems?(gemname)
91	else
92		gem_available_old_rubygems?(gemname)
93	end
94end
95
96# Needed by Ruby 1.8.7 (2010-08-16 patchlevel 302) used as stable in Debian in Aug2012.
97def gem_available_old_rubygems?(gemname)
98	Gem.available?(gemname)
99end
100
101def gem_available_new_rubygems?(gemname)
102	begin
103		true if Gem::Specification.find_by_name(gemname)
104	rescue LoadError
105		false
106	end
107end
108
109gems = %w|json mongo rchardet |
110
111gems.each do |thisgem|
112	begin
113		require 'rubygems' # rubygems is optional
114		if gem_available?(thisgem) #
115			require thisgem
116		else
117end
118	rescue LoadError
119		# that failed.. no big deal
120		raise if $WWDEBUG==true
121	end
122end
123
124require 'lib/target.rb'
125require 'lib/plugins.rb'
126require 'lib/output.rb'
127require 'lib/colour.rb'
128require 'lib/tld.rb'
129require 'lib/version_class.rb'
130require 'lib/http-status.rb'
131HTTP_Status.initialize
132
133# look through LOAD_PATH for the following plugin directories. Could be in same dir as whatweb or /usr/share/whatweb, etc
134PLUGIN_DIRS=[ "plugins", "my-plugins"].map {|x| $LOAD_PATH.map {|y| y+"/"+x if File.exists?(y+"/"+x) } }.flatten.compact
135
136# nothing says pro-developer like using global variables
137$VERSION = "0.4.8-dev"
138$WWDEBUG = false # raise exceptions in plugins, etc
139$verbose = 0 # $VERBOSE is reserved in ruby
140$use_colour = "auto"
141$USER_AGENT = "WhatWeb/#{$VERSION}"
142$MAX_THREADS = 25
143$AGGRESSION = 1
144$FOLLOW_REDIRECT = "always"
145$MAX_REDIRECTS = 10
146$USE_PROXY = false
147$PROXY_HOST = nil
148$PROXY_PORT = 8080
149$PROXY_USER = nil
150$PROXY_PASS = nil
151$URL_PREFIX = ""
152$URL_SUFFIX = ""
153$URL_PATTERN = nil
154$NO_THREADS = false
155$HTTP_OPEN_TIMEOUT = 15
156$HTTP_READ_TIMEOUT = 30
157$WAIT = nil
158$OUTPUT_ERRORS = nil
159$QUIET = false
160$CUSTOM_HEADERS = {}
161$BASIC_AUTH_USER = nil
162$BASIC_AUTH_PASS = nil
163$PLUGIN_TIMES = Hash.new(0)
164$NO_ERRORS = false
165
166
167### matching
168
169# fuzzy matching ftw
170def make_tag_pattern(b)
171	# remove stuff between script and /script
172	# don't bother with  !--, --> or noscript and /noscript
173	inscript = false;
174
175	b.scan(/<([^\s>]*)/).flatten.map {|x|
176			x.downcase!
177			r = nil
178			r = x if inscript == false
179			inscript = true if x == "script"
180			(inscript = false; r = x) if x == "/script"
181			r
182		}.compact.join(",")
183end
184
185def decode_html_entities(s)
186	t = s.dup
187	html_entities = { "&quot;"=>'"', "&apos;"=>"'", "&amp;"=>"&", "&lt;"=>"<", "&gt;"=>">" }
188	html_entities.each_pair { |from,to| t.gsub!(from, to) }
189	t
190end
191
192def certainty_to_words(p)
193	case p
194		when 0..49
195			"maybe"
196		when 50..99
197			"probably"
198		when 100
199			"certain"
200	end
201end
202
203# some plugins want a random string in URLs
204def randstr
205	rand(36 ** 8).to_s(36)
206end
207
208
209def match_ghdb(ghdb, body, meta, status, base_uri)
210	# this could be made faster by creating code to eval once for each plugin
211
212	pp "match_ghdb", ghdb if $verbose > 2
213
214	# take a GHDB string and turn it into code to be evaluated
215	matches = [] # fill with true or false. succeeds if all true
216	s = ghdb
217
218	# does it contain intitle?
219	if s =~ /intitle:/i
220		# extract either the next word or the following words enclosed in "s, it can't possibly be both
221		intitle = (s.scan( /intitle:"([^"]*)"/i) + s.scan(/intitle:([^"]\w+)/i )).to_s
222		matches << (( body =~ /<title>[^<]*#{Regexp.escape(intitle)}[^<]*<\/title>/i ).nil? ? false : true)
223		# strip out the intitle: part
224		s=s.gsub( /intitle:"([^"]*)"/i ,'').gsub( /intitle:([^"]\w+)/i ,'')
225	end
226
227	if s =~ /filetype:/i
228		filetype = ( s.scan(/filetype:"([^"]*)"/i ) + s.scan( /filetype:([^"]\w+)/i )).to_s
229		# lame method: check if the URL ends in the filetype
230		unless base_uri.nil?
231			unless base_uri.path.split("?")[0].nil?
232				matches << (( base_uri.path.split("?")[0] =~ /#{Regexp.escape(filetype)}$/i ).nil? ? false : true)
233			end
234		end
235		s=s.gsub( /filetype:"([^"]*)"/i,'').gsub( /filetype:([^"]\w+)/i,'')
236	end
237
238	if s =~ /inurl:/i
239		inurl = ( s.scan(/inurl:"([^"]*)"/i ) + s.scan( /inurl:([^"]\w+)/i )).flatten
240		# can occur multiple times.
241		inurl.each { |x| matches << (( base_uri.to_s =~ /#{Regexp.escape(x)}/i ).nil? ? false : true)  }
242		# strip out the inurl: part
243		s = s.gsub( /inurl:"([^"]*)"/i,'').gsub( /inurl:([^"]\w+)/i,'' )
244	end
245
246	# split the remaining words except those enclosed in quotes, remove the quotes and sort them
247
248	remaining_words = s.scan( /([^ "]+)|("[^"]+")/i ).flatten.compact.each { |w| w.delete!('"') }.sort.uniq
249
250	pp "Remaining GHDB words", remaining_words if $verbose > 2
251
252	remaining_words.each do |w|
253		# does it start with a - ?
254		if w[0..0] == '-'
255			# reverse true/false if it begins with a -
256			matches << (( body =~ /#{Regexp.escape(w[1..-1])}/i ).nil? ? true : false)
257		else
258			w = w[1..-1] if w[0..0] == '+' # if it starts with +, ignore the 1st char
259			matches << (( body =~ /#{Regexp.escape(w)}/i ).nil? ? false : true)
260		end
261	end
262
263	pp matches if $verbose > 2
264
265	# if all matcbhes are true, then true
266	if matches.uniq == [true]
267		true
268	else
269		false
270	end
271end
272
273
274### targets
275
276def make_target_list( cmdline_args, inputfile=nil, pluginlist = nil )
277	url_list = cmdline_args
278
279	# read each line as a url, skipping lines that begin with a #
280	if !inputfile.nil? and File.exists?(inputfile)
281		pp "loading input file: #{inputfile}" if $verbose > 2
282		url_list += File.open(inputfile).readlines.each { |line| line.strip! }.delete_if { |line| line =~ /^#.*/ }.each { |line| line.delete!("\n") }
283	end
284
285	genrange=url_list.map do |x|
286		range = nil
287		if x =~ /^[0-9\.\-*\/]+$/ and not x =~ /^[\d\.]+$/
288			# check for nmap
289			error "Target ranges require nmap to be in the path" if `which nmap` == ""
290			range = `nmap -n -sL #{x} 2>&1 | egrep -o "([0-9]{1,3}\\.){3}[0-9]{1,3}"`
291			range = range.split("\n")
292		end
293		range
294	end.compact.flatten
295
296	url_list = url_list.select {|x| not x =~ /^[0-9\.\-*\/]+$/ or x =~ /^[\d\.]+$/ }
297	url_list += genrange unless genrange.empty?
298
299
300	#make urls friendlier, test if it's a file, if test for not assume it's http://
301	# http, https, ftp, etc
302	url_list = url_list.map do |x|
303		if File.exists?(x)
304			x
305		else
306			# use url pattern
307			if $URL_PATTERN
308				x = $URL_PATTERN.gsub('%insert%',x)
309			end
310			# add prefix & suffix
311			x=$URL_PREFIX + x + $URL_SUFFIX
312
313			if x =~ (/^[a-z]+:\/\//)
314				x
315			else
316				x.sub(/^/,"http://")
317			end
318		end
319	end
320
321	url_list = url_list.flatten #.sort.uniq
322end
323
324def next_target
325	t = nil
326
327	puts "Target List:" + $targets.inspect if $verbose > 2
328
329	while $recent_targets.include?(t) or t.nil?
330		t = $targets.shift
331		puts "# t at the end of the $targets list" if $verbose > 2
332		# t at the end of the $targets list
333		if t.nil?
334			puts "t is nil" if $verbose > 2
335			if $targets.empty?
336				if Thread.list.size > 1
337					if $verbose > 2
338						puts "Thread list size: #{Thread.list.size}"
339						Thread.list.each do |thread|
340							puts "Thread: #{thread.inspect} is #{thread.status}"
341						end
342					end
343					#sleep 1
344					Thread.pass
345				else
346					puts "breaking now" if $verbose > 2
347					break
348				end
349			end
350		end
351	end
352
353	puts "Recent Targets:" + $recent_targets.join(",") if $verbose > 2
354
355	$recent_targets.push t
356	$recent_targets.pop if $recent_targets.size > 100 # we dont need to care about mroe than 100
357
358	t
359end
360
361# backwards compatible convenience method for plugins to use
362def open_target( url )
363	newt = Target.new(url)
364	newt.open
365	[ newt.status, newt.uri, newt.ip, newt.body, newt.headers, newt.raw_headers ]
366end
367
368### output
369
370def error( s )
371	return if $NO_ERRORS
372	if defined?( $semaphore )
373		# We want the output mutex locked.
374		# Has our current thread already locked the Mutex?
375		begin
376			$semaphore.lock
377		rescue ThreadError
378			# we're already locked. This was expected.
379		end
380	end
381	if ( $use_colour=="auto" ) or ( $use_colour=="always" )
382		STDERR.puts red( s )
383	else
384		STDERR.puts s
385	end
386	STDERR.flush
387	unless $OUTPUT_ERRORS.nil?
388		$OUTPUT_ERRORS.out( s )
389	end
390	$semaphore.unlock if defined?( $semaphore )
391end
392
393# takes a string and returns an array of lines. used by plugin_info
394def word_wrap( s,width=10 )
395	ret=[]
396	line=""
397	s.split.map { |x|
398		word=x
399		if line.size + x.size + 1 <= width
400			line += x + " "
401		else
402			if word.size > width
403				ret << line
404				line = ""
405				w = word.clone
406				while w.size > width
407					ret << w[0..(width-1)]
408					w = w[width.to_i..-1]
409				end
410				ret << w unless w.size == 0
411			else
412				ret << line
413				line = x + " "
414			end
415		end
416 	}
417	ret << line unless line.empty?
418
419	ret
420end
421
422### core
423
424def run_plugins(target)
425		results=[]
426		$plugins_to_use.each do |name,plugin|
427			begin
428				while plugin.locked?
429					#sleep 0.1
430					puts "Waiting for plugin:#{name} to unlock" if $verbose > 2
431					Thread.pass
432				end
433				plugin.lock
434
435				plugin.init(target)
436
437				# eXecute the plugin
438				#start_time = Time.now
439				result=plugin.x
440				#end_time = Time.now
441				#$PLUGIN_TIMES[name] += end_time - start_time
442
443				plugin.unlock
444
445			rescue Exception => err
446				error("ERROR: Plugin #{name} failed for #{target.to_s}. #{err}")
447				plugin.unlock
448				raise if $WWDEBUG == true
449			end
450			results << [name, result] unless result.nil? or result.empty?
451		end
452	results
453end
454
455
456
457
458def usage_full()
459puts"
460.$$$     $.                                   .$$$     $.
461$$$$     $$. .$$$  $$$ .$$$$$$.  .$$$$$$$$$$. $$$$     $$. .$$$$$$$. .$$$$$$.
462$ $$     $$$ $ $$  $$$ $ $$$$$$. $$$$$ $$$$$$ $ $$     $$$ $ $$   $$ $ $$$$$$.
463$ `$     $$$ $ `$  $$$ $ `$  $$$ $$' $ `$ `$$ $ `$     $$$ $ `$      $ `$  $$$'
464$. $     $$$ $. $$$$$$ $. $$$$$$ `$  $. $  :' $. $     $$$ $. $$$$   $. $$$$$.
465$::$  .  $$$ $::$  $$$ $::$  $$$     $::$     $::$  .  $$$ $::$      $::$  $$$$
466$;;$ $$$ $$$ $;;$  $$$ $;;$  $$$     $;;$     $;;$ $$$ $$$ $;;$      $;;$  $$$$
467$$$$$$ $$$$$ $$$$  $$$ $$$$  $$$     $$$$     $$$$$$ $$$$$ $$$$$$$$$ $$$$$$$$$'
468
469"
470
471puts "WhatWeb - Next generation web scanner version #{$VERSION}.\nDeveloped by Andrew Horton aka urbanadventurer and Brendan Coles."
472puts "Homepage: http://www.morningstarsecurity.com/research/whatweb"
473puts
474puts "Usage: whatweb [options] <URLs>"
475puts "
476TARGET SELECTION:
477  <TARGETs>\t\t\tEnter URLs, hostnames, IP adddresses,
478  \t\t\t\tfilenames, or nmap-format IP address ranges.
479  --input-file=FILE, -i\t\tRead targets from a file. You can pipe
480\t\t\t\thostnames or URLs directly with -i /dev/stdin.
481
482TARGET MODIFICATION:
483  --url-prefix\t\t\tAdd a prefix to target URLs.
484  --url-suffix\t\t\tAdd a suffix to target URLs.
485  --url-pattern\t\t\tInsert the targets into a URL.
486\t\t\t\te.g. example.com/%insert%/robots.txt
487
488AGGRESSION:
489The aggression level controls the trade-off between speed/stealth and
490reliability.
491  --aggression, -a=LEVEL\tSet the aggression level. Default: 1.
492  1. Stealthy\t\t\tMakes one HTTP request per target and also
493  \t\t\t\tfollows redirects.
494  3. Aggressive\t\t\tIf a level 1 plugin is matched, additional
495  \t\t\t\trequests will be made.
496  4. Heavy\t\t\tMakes a lot of HTTP requests per target. URLs
497  \t\t\t\tfrom all plugins are attempted.
498
499HTTP OPTIONS:
500  --user-agent, -U=AGENT\tIdentify as AGENT instead of WhatWeb/#{$VERSION}.
501  --header, -H\t\t\tAdd an HTTP header. eg \"Foo:Bar\". Specifying a
502\t\t\t\tdefault header will replace it. Specifying an
503\t\t\t\tempty value, e.g. \"User-Agent:\" will remove it.
504  --follow-redirect=WHEN\tControl when to follow redirects. WHEN may be
505\t\t\t\t`never', `http-only', `meta-only', `same-site',
506\t\t\t\t`same-domain' or `always'. Default: #{$FOLLOW_REDIRECT}.
507  --max-redirects=NUM\t\tMaximum number of redirects. Default: 10.
508
509AUTHENTICATION:
510  --user, -u=<user:password>\tHTTP basic authentication.
511  --cookie, -c=COOKIES\t\tUse cookies, e.g. 'name=value; name2=value2'.
512
513PROXY:
514  --proxy\t\t\t<hostname[:port]> Set proxy hostname and port.
515\t\t\t\tDefault: #{$PROXY_PORT}.
516  --proxy-user\t\t\t<username:password> Set proxy user and password.
517
518PLUGINS:
519  --list-plugins, -l\t\tList all plugins.
520  --info-plugins, -I=[SEARCH]\tList all plugins with detailed information.
521\t\t\t\tOptionally search with keywords in a comma
522\t\t\t\tdelimited list.
523  --search-plugins=STRING\tSearch plugins for a keyword.
524  --plugins, -p=LIST\t\tSelect plugins. LIST is a comma delimited set
525\t\t\t\tof selected plugins. Default is all.
526\t\t\t\tEach element can be a directory, file or plugin
527\t\t\t\tname and can optionally have a modifier, +/-.
528\t\t\t\tExamples: +/tmp/moo.rb,+/tmp/foo.rb
529\t\t\t\ttitle,md5,+./plugins-disabled/
530\t\t\t\t./plugins-disabled,-md5
531\t\t\t\t-p + is a shortcut for -p +plugins-disabled.
532
533  --grep, -g=STRING\t\tSearch for STRING in HTTP responses. Reports
534\t\t\t\twith a plugin named Grep.
535  --custom-plugin=DEFINITION\tDefine a custom plugin named Custom-Plugin,
536\t\t\t\tExamples: \":text=>'powered by abc'\"
537\t\t\t\t\":version=>/powered[ ]?by ab[0-9]/\"
538\t\t\t\t\":ghdb=>'intitle:abc \\\"powered by abc\\\"'\"
539\t\t\t\t\":md5=>'8666257030b94d3bdb46e05945f60b42'\"
540\t\t\t\t\"{:text=>'powered by abc'}\"
541  --dorks=PLUGIN\t\tList Google dorks for the selected plugin.
542
543OUTPUT:
544  --verbose, -v\t\t\tVerbose output includes plugin descriptions.
545\t\t\t\tUse twice for debugging.
546  --colour,--color=WHEN\t\tcontrol whether colour is used. WHEN may be
547\t\t\t\t`never', `always', or `auto'.
548  --quiet, -q\t\t\tDo not display brief logging to STDOUT.
549  --no-errors\t\t\tSuppress error messages.
550
551LOGGING:
552  --log-brief=FILE\t\tLog brief, one-line output.
553  --log-verbose=FILE\t\tLog verbose output.
554  --log-errors=FILE\t\tLog errors.
555  --log-xml=FILE\t\tLog XML format.
556  --log-json=FILE\t\tLog JSON format.
557  --log-sql=FILE\t\tLog SQL INSERT statements.
558  --log-sql-create=FILE\t\tCreate SQL database tables.
559  --log-json-verbose=FILE\tLog JSON Verbose format.
560  --log-magictree=FILE\t\tLog MagicTree XML format.
561  --log-object=FILE\t\tLog Ruby object inspection format.
562  --log-mongo-database\t\tName of the MongoDB database.
563  --log-mongo-collection\tName of the MongoDB collection.
564\t\t\t\tDefault: whatweb.
565  --log-mongo-host\t\tMongoDB hostname or IP address.
566\t\t\t\tDefault: 0.0.0.0.
567  --log-mongo-username\t\tMongoDB username. Default: nil.
568  --log-mongo-password\t\tMongoDB password. Default: nil.
569
570PERFORMANCE & STABILITY:
571  --max-threads, -t\t\tNumber of simultaneous threads. Default: #{$MAX_THREADS}.
572  --open-timeout\t\tTime in seconds. Default: #{$HTTP_OPEN_TIMEOUT}.
573  --read-timeout\t\tTime in seconds. Default: #{$HTTP_READ_TIMEOUT}.
574  --wait=SECONDS\t\tWait SECONDS between connections.
575\t\t\t\tThis is useful when using a single thread.
576
577HELP & MISCELLANEOUS:
578  --short-help\t\t\tShort usage help.
579  --help, -h\t\t\tComplete usage help.
580  --debug\t\t\tRaise errors in plugins.
581  --version\t\t\tDisplay version information.
582
583EXAMPLE USAGE:
584* Scan example.com.
585  ./whatweb example.com
586* Scan reddit.com slashdot.org with verbose plugin descriptions.
587  ./whatweb -v reddit.com slashdot.org
588* An aggressive scan of wired.com detects the exact version of WordPress.
589  ./whatweb -a 3 www.wired.com
590* Scan the local network quickly and suppress errors.
591  whatweb --no-errors 192.168.0.0/24
592* Scan the local network for https websites.
593  whatweb --no-errors --url-prefix https:// 192.168.0.0/24
594* Scan for crossdomain policies in the Alexa Top 1000.
595  ./whatweb -i plugin-development/alexa-top-100.txt \\
596  --url-suffix /crossdomain.xml -p crossdomain_xml\n"
597
598	suggestions=""
599	suggestions << "To enable JSON logging install the json gem.\n" unless gem_available?('json')
600	suggestions << "To enable MongoDB logging install the mongo gem.\n" unless gem_available?('mongo')
601	suggestions << "To enable character set detection and MongoDB logging install the rchardet gem.\n" unless gem_available?('rchardet')
602
603	unless suggestions.empty?
604		print "\nOPTIONAL DEPENDENCIES\n--------------------------------------------------------------------------------\n" + suggestions + "\n"
605	end
606
607	puts
608end
609
610def usage_short()
611puts"
612.$$$     $.                                   .$$$     $.
613$$$$     $$. .$$$  $$$ .$$$$$$.  .$$$$$$$$$$. $$$$     $$. .$$$$$$$. .$$$$$$.
614$ $$     $$$ $ $$  $$$ $ $$$$$$. $$$$$ $$$$$$ $ $$     $$$ $ $$   $$ $ $$$$$$.
615$ `$     $$$ $ `$  $$$ $ `$  $$$ $$' $ `$ `$$ $ `$     $$$ $ `$      $ `$  $$$'
616$. $     $$$ $. $$$$$$ $. $$$$$$ `$  $. $  :' $. $     $$$ $. $$$$   $. $$$$$.
617$::$  .  $$$ $::$  $$$ $::$  $$$     $::$     $::$  .  $$$ $::$      $::$  $$$$
618$;;$ $$$ $$$ $;;$  $$$ $;;$  $$$     $;;$     $;;$ $$$ $$$ $;;$      $;;$  $$$$
619$$$$$$ $$$$$ $$$$  $$$ $$$$  $$$     $$$$     $$$$$$ $$$$$ $$$$$$$$$ $$$$$$$$$'
620
621"
622
623puts "WhatWeb - Next generation web scanner version #{$VERSION}.\nDeveloped by Andrew Horton aka urbanadventurer and Brendan Coles."
624puts "Homepage: http://www.morningstarsecurity.com/research/whatweb"
625puts
626puts "Usage: whatweb [options] <URLs>"
627puts "
628TARGET SELECTION:
629  <TARGETs>\t\t\tEnter URLs, hostnames, IP adddresses, or
630  \t\t\t\tnmap-format IP ranges.
631  --input-file=FILE, -i\t\tRead targets from a file.
632
633AGGRESSION:
634  --aggression, -a=LEVEL\tSet the aggression level. Default: 1.
635  1. Stealthy\t\t\tMakes one HTTP request per target and also
636  \t\t\t\tfollows redirects.
637  3. Aggressive\t\t\tIf a level 1 plugin is matched, additional
638  \t\t\t\trequests will be made.
639
640PLUGINS:
641  --list-plugins, -l\t\tList all plugins.
642  --info-plugins, -I=[SEARCH]\tList all plugins with detailed information.
643\t\t\t\tOptionally search with a keyword.
644  --search-plugins=STRING\tSearch plugins for a keyword.
645  --grep, -g=STRING\t\tSearch for STRING in HTTP responses. Reports
646\t\t\t\twith a plugin named Grep.
647OUTPUT:
648  --verbose, -v\t\t\tVerbose output includes plugin descriptions.
649\t\t\t\tUse twice for debugging.
650  --colour,--color=WHEN\t\tcontrol whether colour is used. WHEN may be
651\t\t\t\t`never', `always', or `auto'.
652
653HELP & MISCELLANEOUS:
654  --short-help\t\t\tThis short usage help.
655  --help, -h\t\t\tComplete usage help.
656
657EXAMPLE USAGE:
658* Scan example.com.
659  ./whatweb example.com
660* Scan reddit.com slashdot.org with verbose plugin descriptions.
661  ./whatweb -v reddit.com slashdot.org
662* An aggressive scan of wired.com detects the exact version of WordPress.
663  ./whatweb -a 3 www.wired.com
664* Scan the local network quickly and suppress errors.
665  whatweb --no-errors 192.168.0.0/24
666* Scan the local network for HTTPS websites.
667  whatweb --no-errors --url-prefix https:// 192.168.0.0/24
668* Scan for crossdomain policies in the Alexa Top 1000.
669  ./whatweb -i plugin-development/alexa-top-100.txt \\
670  --url-suffix /crossdomain.xml -p crossdomain_xml
671
672  Note: This is the short usage help.
673  For the complete usage help use -h or --help.\n"
674
675puts
676
677end
678
679
680if ARGV.size==0 # faster usage info
681	usage_short
682	exit
683end
684
685plugin_selection=nil
686use_custom_plugin=false
687use_custom_grep_plugin=false
688input_file=nil
689output_list = []
690mongo={}
691mongo[:use_mongo_log]=false
692
693# optional arguments work badly with URLs
694opts = GetoptLong.new(
695      [ '-h', '--help', GetoptLong::NO_ARGUMENT ],
696      [ '--short-help', GetoptLong::NO_ARGUMENT ],
697      [ '-v','--verbose', GetoptLong::NO_ARGUMENT ],
698      [ '-l','--list-plugins', GetoptLong::NO_ARGUMENT ],
699      [ '-p','--plugins', GetoptLong::REQUIRED_ARGUMENT ],
700      [ '-I','--info-plugins','--search-plugins', GetoptLong::OPTIONAL_ARGUMENT ],
701      [ '--dorks', GetoptLong::REQUIRED_ARGUMENT ],
702      [ '--colour','--color', GetoptLong::REQUIRED_ARGUMENT ],
703      [ '--log-object', GetoptLong::REQUIRED_ARGUMENT ],
704      [ '--log-brief', GetoptLong::REQUIRED_ARGUMENT ],
705      [ '--log-xml', GetoptLong::REQUIRED_ARGUMENT ],
706      [ '--log-json', GetoptLong::REQUIRED_ARGUMENT ],
707      [ '--log-json-verbose', GetoptLong::REQUIRED_ARGUMENT ],
708      [ '--log-magictree', GetoptLong::REQUIRED_ARGUMENT ],
709      [ '--log-verbose', GetoptLong::REQUIRED_ARGUMENT ],
710      [ '--log-mongo-collection', GetoptLong::REQUIRED_ARGUMENT ],
711      [ '--log-mongo-host', GetoptLong::REQUIRED_ARGUMENT ],
712      [ '--log-mongo-database', GetoptLong::REQUIRED_ARGUMENT ],
713      [ '--log-mongo-username', GetoptLong::REQUIRED_ARGUMENT ],
714      [ '--log-mongo-password', GetoptLong::REQUIRED_ARGUMENT ],
715      [ '--log-sql', GetoptLong::REQUIRED_ARGUMENT ],
716      [ '--log-sql-create', GetoptLong::REQUIRED_ARGUMENT ],
717      [ '--log-errors', GetoptLong::REQUIRED_ARGUMENT ],
718      [ '--no-errors', GetoptLong::NO_ARGUMENT ],
719      [ '-i','--input-file', GetoptLong::REQUIRED_ARGUMENT ],
720      [ '-U','--user-agent', GetoptLong::REQUIRED_ARGUMENT ],
721      [ '-a','--aggression', GetoptLong::REQUIRED_ARGUMENT ],
722      [ '-t','--max-threads', GetoptLong::REQUIRED_ARGUMENT ],
723      [ '--follow-redirect', GetoptLong::REQUIRED_ARGUMENT ],
724      [ '--max-redirects', GetoptLong::REQUIRED_ARGUMENT ],
725      [ '--proxy', GetoptLong::REQUIRED_ARGUMENT ],
726      [ '--proxy-user', GetoptLong::REQUIRED_ARGUMENT ],
727      [ '--url-prefix', GetoptLong::REQUIRED_ARGUMENT ],
728      [ '--url-suffix', GetoptLong::REQUIRED_ARGUMENT ],
729      [ '--url-pattern', GetoptLong::REQUIRED_ARGUMENT ],
730      [ '--custom-plugin', GetoptLong::REQUIRED_ARGUMENT ],
731      [ '-g','--grep', GetoptLong::REQUIRED_ARGUMENT ],
732      [ '--open-timeout', GetoptLong::REQUIRED_ARGUMENT ],
733      [ '--read-timeout', GetoptLong::REQUIRED_ARGUMENT ],
734      [ '--header','-H', GetoptLong::REQUIRED_ARGUMENT ],
735      [ '--cookie','-c', GetoptLong::REQUIRED_ARGUMENT ],
736      [ '--user','-u', GetoptLong::REQUIRED_ARGUMENT ],
737      [ '--wait', GetoptLong::REQUIRED_ARGUMENT ],
738      [ '--debug', GetoptLong::NO_ARGUMENT ],
739      [ '--version', GetoptLong::NO_ARGUMENT ],
740      [ '-q','--quiet', GetoptLong::NO_ARGUMENT]
741    )
742
743begin
744	opts.each do |opt, arg|
745		case opt
746			when '-i','--input-file'
747				input_file=arg
748			when '-l','--list-plugins'
749				PluginSupport.load_plugins
750				PluginSupport.plugin_list
751				exit
752			when '-p','--plugins'
753				plugin_selection=arg
754			when '-I','--info-plugins'
755				PluginSupport.load_plugins
756				PluginSupport.plugin_info(arg.split(","))
757				exit
758			when '--dorks'
759				PluginSupport.load_plugins
760				PluginSupport.plugin_dorks(arg)
761				exit
762
763			when '--color','--colour'
764				$use_colour="always" unless arg # no argument
765				case arg.downcase
766					when 'auto'
767						$use_colour="auto"
768					when 'always'
769						$use_colour="always"
770					when 'never'
771						$use_colour=false
772					else
773						raise("--colour argument not recognized")
774					end
775			when '--log-object'
776				output_list << OutputObject.new(arg)
777			when '--log-brief'
778			 	output_list << OutputBrief.new(arg)
779			when '--log-xml'
780			 	output_list << OutputXML.new(arg)
781			when '--log-magictree'
782				output_list << OutputMagicTreeXML.new(arg)
783			when '--log-verbose'
784				output_list << OutputVerbose.new(arg)
785			when '--log-sql'
786			 	output_list << OutputSQL.new(arg)
787			when '--log-sql-create'
788				PluginSupport.load_plugins("+")
789				# delete the file if it already exists
790				begin
791					File.delete(arg)
792				rescue
793				end
794			 	OutputSQL.new(arg).create_tables
795				puts "SQL CREATE statements written to #{arg}"
796				exit
797			when '--log-json'
798				if defined?(JSON)
799			 		output_list << OutputJSON.new(arg)
800				else
801					raise("Sorry. The JSON gem is required for JSON output")
802				end
803			when '--log-json-verbose'
804				if defined?(JSON)
805			 		output_list << OutputJSONVerbose.new(arg)
806				else
807					raise("Sorry. The JSON gem is required for JSONVerbose output")
808				end
809			when '--log-mongo-collection'
810				if defined?(Mongo) and defined?(CharDet)
811					mongo[:collection]=arg
812					mongo[:use_mongo_log]=true
813				else
814					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
815				end
816
817			when '--log-mongo-host'
818				if defined?(Mongo) and defined?(CharDet)
819			 		mongo[:host]=arg
820					mongo[:use_mongo_log]=true
821				else
822					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
823				end
824
825			when '--log-mongo-database'
826				if defined?(Mongo) and defined?(CharDet)
827			 		mongo[:database]=arg
828					mongo[:use_mongo_log]=true
829				else
830					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
831				end
832			when '--log-mongo-username'
833				if defined?(Mongo) and defined?(CharDet)
834			 		mongo[:username]=arg
835					mongo[:use_mongo_log]=true
836				else
837					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
838				end
839			when '--log-mongo-password'
840				if defined?(Mongo) and defined?(CharDet)
841			 		mongo[:password]=arg
842					mongo[:use_mongo_log]=true
843				else
844					raise("Sorry. The mongo and rchardet gems are required for Mongo output")
845				end
846			when '--log-errors'
847			 	$OUTPUT_ERRORS = OutputErrors.new(arg)
848			when '--no-errors'
849			 	$NO_ERRORS = true
850			when '-U','--user-agent'
851				$USER_AGENT=arg
852			when '-t','--max-threads'
853				$MAX_THREADS=arg.to_i
854			when '-a','--aggression'
855				raise "Agression level must be 1,3, or 4. #{arg} is invalid." unless [1,3,4].include? arg.to_i
856				$AGGRESSION=arg.to_i
857			when '--proxy'
858				$USE_PROXY=true
859				$PROXY_HOST = arg.to_s.split(":")[0]
860				$PROXY_PORT = arg.to_s.split(":")[1].to_i if arg.to_s.include?(":")
861			when '--proxy-user'
862				$PROXY_USER=arg.to_s.split(":")[0]
863				$PROXY_PASS=arg.to_s.scan(/^[^:]*:(.+)/).to_s if arg =~ /^[^:]*:(.+)/
864			when '-q','--quiet'
865				$QUIET=true
866			when '--url-prefix'
867				$URL_PREFIX=arg
868			when '--url-suffix'
869				$URL_SUFFIX=arg
870			when '--url-pattern'
871				$URL_PATTERN=arg
872			when '--custom-plugin'
873				use_custom_plugin=true if PluginSupport.custom_plugin(arg)
874			when '--grep','-g'
875				use_custom_grep_plugin=true if PluginSupport.custom_plugin(arg,"grep")
876			when '--follow-redirect'
877				if ["never","http-only","meta-only","same-site","same-domain","always"].include?(arg.downcase)
878					$FOLLOW_REDIRECT=arg.downcase
879				else
880					raise("Invalid --follow-redirect parameter.")
881				end
882			when '--max-redirects'
883				$MAX_REDIRECTS=arg.to_i
884			when '--open-timeout'
885				$HTTP_OPEN_TIMEOUT=arg.to_i
886			when '--read-timeout'
887				$HTTP_READ_TIMEOUT=arg.to_i
888			when '--wait'
889				$WAIT = arg.to_i
890			when '-H','--header'
891				begin
892					x=arg.scan(/([^:]+):(.*)/).flatten
893					raise if x.empty?
894					$CUSTOM_HEADERS[x.first]=x.last
895				rescue
896					raise("Invalid --header parameter.")
897				end
898			when '-c','--cookie'
899				begin
900					raise if arg.empty?
901					$CUSTOM_HEADERS["Cookie"]=arg
902				rescue
903					raise("Cookie require a parameter, e.g. name=value; name2=value2")
904				end
905			when '-u','--user'
906				$BASIC_AUTH_USER=arg.split(":").first
907				$BASIC_AUTH_PASS=arg.to_s.scan(/^[^:]*:(.+)/).to_s if arg =~ /^[^:]*:(.+)/
908			when '--debug'
909				$WWDEBUG = true
910			when '--short-help'
911				usage_short
912				exit
913			when '-h','--help'
914				usage_full
915				exit
916			when '-v','--verbose'
917				$verbose=$verbose+1
918			when '--version'
919				puts "WhatWeb version #{$VERSION} ( http://www.morningstarsecurity.com/research/whatweb/ )"
920				exit
921		end
922	end
923rescue Errno::EPIPE
924	exit
925rescue StandardError, GetoptLong::Error => err
926	# Disable colours in Windows environments for errors in usage
927	if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/
928		$use_colour = false
929	end
930	puts
931	error err
932	exit
933end
934
935# sanity check # Disable colours in Windows environments when set to auto
936if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/
937	$use_colour = false unless $use_colour == "always"
938end
939
940### PLUGINS
941plugin_selection += ",+Custom-Plugin" 	if use_custom_plugin and plugin_selection
942plugin_selection += ",+Grep" 		if use_custom_grep_plugin and plugin_selection
943$plugins_to_use = PluginSupport.load_plugins(plugin_selection)
944# load all the plugins
945
946# sanity check # no plugins?
947if $plugins_to_use.size == 0
948	error "No plugins selected, exiting."
949	exit 1
950end
951
952# optimise plugins
953PluginSupport.precompile_regular_expressions
954
955### OUTPUT
956output_list << OutputBrief.new unless $QUIET or $verbose > 0 # by default output brief
957output_list << OutputObject.new() if $verbose > 1 # full output if -vv
958output_list << OutputVerbose.new() if $verbose > 0 # full output if -v
959
960## output dependencies
961if mongo[:use_mongo_log]
962	if $plugins_to_use.map { |a,b| a }.include?("Charset")
963		output_list << OutputMongo.new(mongo)
964	else
965		error("MongoDB logging requires the Charset plugin to be activated. The Charset plugin is the slowest whatweb plugin, it not included by default, and resides in the plugins-disabled folder. Use ./whatweb -p +./plugins-disabled/Charset.rb to enable it.")
966		exit
967	end
968end
969
970## Headers
971$CUSTOM_HEADERS["User-Agent"]=$USER_AGENT unless $CUSTOM_HEADERS["User-Agent"]
972$CUSTOM_HEADERS.delete_if {|k,v| v=="" }
973
974### TARGETS
975# clean up urls, add example urls if needed
976$targets=make_target_list(ARGV, input_file, $plugins_to_use)
977$recent_targets=[]
978
979# fail & show usage if no targets.
980if $targets.size <1
981	error "No targets selected"
982	exit 1
983end
984
985$semaphore=Mutex.new
986Thread.abort_on_exception = true if $WWDEBUG
987
988while t = next_target
989		Thread.new(t) do |thistarget|
990			begin
991				target = Target.new(thistarget) # we set the target within the thread
992			rescue => err
993				error(err)
994				next
995			end
996
997			puts Thread.current.to_s + " started for " + target.to_s if $verbose>1
998			sleep $WAIT unless $WAIT.nil? # wait
999
1000			# follow redirects
1001			no_redirects =false
1002			num_redirects = 0
1003			while no_redirects == false do
1004				no_redirects=true if target.is_file?
1005				# if we redirect 10 times we give up
1006				if num_redirects == $MAX_REDIRECTS
1007					error("ERROR Too many redirects: #{target.to_s}")
1008					no_redirects=true
1009					next
1010				end
1011
1012				begin
1013					target.open
1014				rescue => err
1015					error("ERROR Opening target: #{target.to_s} - #{err}")
1016					no_redirects = true # without this we can get stuck in a loop
1017					raise if $WWDEBUG
1018					next
1019				end
1020
1021				if target.is_url? and target.status.nil?
1022					# assume all HTTP sites return a status
1023					no_redirects=true
1024					next
1025				end
1026
1027				results = run_plugins(target)
1028
1029				# reporting
1030				# multiple output plugins simultaneously, some stdout, some files
1031				output_list.each do |o|
1032					begin
1033						o.out(target, target.status, results)
1034					rescue => err
1035						#srsly, logging failed
1036						error("ERROR Logging failed: #{target.to_s} - #{err}")
1037						raise if $WWDEBUG==true
1038					end
1039				end
1040
1041				# REDIRECTION
1042				unless no_redirects
1043					begin
1044						if newtarget = target.get_redirection_target
1045							num_redirects+=1
1046							target=Target.new(newtarget)
1047						else
1048							no_redirects=true
1049						end
1050					rescue => err
1051						error("ERROR Redirection broken: #{target.to_s} - #{err}")
1052						no_redirects=true
1053						raise if $WWDEBUG==true
1054					end
1055				end
1056			end # while no_redirects
1057		end # Thread.new
1058
1059	while Thread.list.size>($MAX_THREADS+1)
1060		puts "Thread list full, passing control" if $verbose>1
1061		#sleep 0.5
1062		Thread.pass
1063	end
1064end # targets.each
1065
1066# close output logs
1067output_list.each {|o|
1068	o.close
1069}
1070
1071# shutdown plugins
1072Plugin.registered_plugins.map {|name,plugin| plugin.shutdown }
1073
1074#pp $PLUGIN_TIMES.sort_by {|x,y|y }
1075