1##
2# This file is part of WhatWeb and may be subject to
3# redistribution and commercial restrictions. Please see the WhatWeb
4# web site for more information on licensing and terms of use.
5# http://www.morningstarsecurity.com/research/whatweb
6##
7# Version 0.3 # 2011-03-23 #
8# Removed aggressive section
9##
10# Version 0.2 #
11# Added aggressive `/robots.txt` retrieval
12##
13Plugin.define "robots_txt" do
14author "Brendan Coles <bcoles@gmail.com>" # 2010-10-22
15version "0.3"
16description "This plugin identifies robots.txt files and extracts both allowed and disallowed directories. - More Info: http://www.robotstxt.org/"
17
18# Google results as at 2011-03-23 #
19# 920 for inurl:robots.txt filetype:txt
20
21
22
23# Passive #
24def passive
25	m=[]
26
27	# Extract directories if current file is robots.txt
28	if @base_uri.path == "/robots.txt" and @body =~ /^User-agent:/i
29
30		# File Exists
31		m << { :name=>"File Exists" }
32
33		# Disallow
34		if @body =~ /^Disallow:[\s]*(.+)$/i
35			m << { :string=>@body.scan(/^Disallow:[\s]*(.+)/i) }
36		end
37
38		# Allow
39		if @body =~ /^Allow:[\s]*(.+)$/i
40			m << { :string=>@body.scan(/^Allow:[\s]*(.+)/i) }
41		end
42
43	end
44
45	# Return passive matches
46	m
47end
48
49end
50
51