1# robots.txt for http://www.wikihow.com
2# based on wikipedia.org's robots.txt
3#
4# Crawlers that are kind enough to obey, but which we'd rather not have
5# unless they're feeding search engines.
6#Sitemap: http://www.wikihow.com/sitemap_index.xml
7#
8# If your bot supports such a thing using the 'Crawl-delay' or another
9# instruction, please let us know.  We can add it to our robots.txt.
10#
11# Friendly, low-speed bots are welcome viewing article pages, but not
12# dynamically-generated pages please. Article pages contain our site's
13# real content.
14
15User-agent: archive.org
16Disallow: /api.php
17Disallow: /index.php
18Disallow: /Special:
19
20User-agent: DOC
21Disallow: /
22
23User-agent: Download Ninja
24Disallow: /
25
26User-agent: Fetch
27Disallow: /
28
29User-agent: HMSE_Robot
30Disallow: /
31
32User-agent: HTTrack
33Disallow: /
34
35# Doesn't follow robots.txt anyway, but...
36User-agent: k2spider
37Disallow: /
38
39User-agent: larbin
40Disallow: /
41
42User-agent: libwww
43Disallow: /
44
45User-agent: linko
46Disallow: /
47
48User-agent: Microsoft.URL.Control
49Disallow: /
50
51User-agent: MSIECrawler
52Disallow: /
53
54# Requests many pages per second
55# http://www.nameprotect.com/botinfo.html
56User-agent: NPBot
57Disallow: /
58
59User-agent: Offline Explorer
60Disallow: /
61
62# Some bots are known to be trouble, particularly those designed to copy
63# entire sites. Please obey robots.txt.
64User-agent: sitecheck.internetseer.com
65Disallow: /
66
67User-agent: SiteSnagger
68Disallow: /
69
70User-agent: Teleport
71Disallow: /
72
73User-agent: TeleportPro
74Disallow: /
75
76User-agent: UbiCrawler
77Disallow: /
78
79User-agent: WebCopier
80Disallow: /
81
82# A capture bot, downloads gazillions of pages with no public benefit
83# http://www.webreaper.net/
84User-agent: WebReaper
85Disallow: /
86
87User-agent: WebStripper
88Disallow: /
89
90User-agent: WebZIP
91Disallow: /
92
93# wget in recursive mode uses too many resources for us.
94# Please read the man page and use it properly; there is a
95# --wait option you can use to set the delay between hits,
96# for instance.  Please wait 3 seconds between each request.
97User-agent: wget
98Disallow: /
99
100User-agent: Xenu
101Disallow: /
102
103User-agent: Zao
104Disallow: /
105
106User-agent: Zealbot
107Disallow: /
108
109User-agent: ZyBORG
110Disallow: /
111
112User-agent: *
113Allow: /Special:AllPages
114Allow: /Special:Block
115Allow: /Special:BlockList
116Allow: /Special:Categorylisting
117Allow: /Special:CategoryListing
118Allow: /Special:Charity
119Allow: /Special:EmailUser
120Allow: /Special:LSearch
121Allow: /Special:NewPages
122Allow: /Special:ReindexedPages
123Allow: /Special:PopularPages
124Allow: /Special:QABox
125Allow: /Special:SearchAd
126Allow: /Special:Sitemap
127Allow: /Special:ThankAuthors
128Allow: /Special:UserLogin
129Allow: /index.php?*action=credits
130Allow: /index.php?*MathShowImage
131Allow: /index.php?*printable
132Disallow: /w/
133Disallow: /forum/
134Disallow: /index.php
135Disallow: /*feed=rss
136Disallow: /*action=delete
137Disallow: /*action=history
138Disallow: /Special:
139Disallow: /images/samplepdfs
140Disallow: /images/sampledocs*doc
141Disallow: /images/sampledocs*pdf
142Disallow: /images/sampledocs*txt
143Disallow: /*platform=
144Disallow: /*variant=
145Noindex: /w/
146Noindex: /forum/
147Noindex: /index.php
148Noindex: /*feed=rss
149Noindex: /*action=delete
150Noindex: /*action=history
151Noindex: /Special:
152Noindex: /images/samplepdfs
153Noindex: /images/sampledocs*doc
154Noindex: /images/sampledocs*pdf
155Noindex: /images/sampledocs*txt
156Noindex: /*platform=
157Noindex: /*variant=
158