mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-31 05:00:41 +00:00
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@232 e7d03fd6-7b0d-0410-9947-9c21f3af8025
58 lines
1.3 KiB
Text
58 lines
1.3 KiB
Text
# /robots.txt for http://www.musi-cal.com/
|
|
# See http://info.webcrawler.com/mak/projects/robots/norobots.html
|
|
# Skip Montanaro (skip@mojam.com)
|
|
# - adapted from the robots.txt file at http://web.nexor.co.uk/
|
|
|
|
# disallow a bunch of ill-behaved user agents (doubt this will deter them...)
|
|
|
|
User-agent: ExtractorPro
|
|
Disallow: /
|
|
|
|
User-agent: EmailSiphon
|
|
Disallow: /
|
|
|
|
User-agent: EmailWolf
|
|
Disallow: /
|
|
|
|
User-agent: CherryPickerSE/1.0
|
|
Disallow: /
|
|
|
|
User-agent: CherryPickerElite/1.0
|
|
Disallow: /
|
|
|
|
User-agent: EmailCollector/1.0
|
|
Disallow: /
|
|
|
|
User-agent: EmailWolf 1.00
|
|
Disallow: /
|
|
|
|
User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0
|
|
Disallow: /
|
|
|
|
User-agent: EmailSiphon
|
|
Disallow: /
|
|
|
|
User-agent: Mozilla/2.0 (compatible; NEWT ActiveX; Win32)
|
|
Disallow: /
|
|
|
|
# by default
|
|
|
|
User-agent: *
|
|
Disallow: /ccrd # not useful to spiders
|
|
Disallow: /click # not useful to spiders
|
|
Disallow: /search # dynamic
|
|
Disallow: /hc # dynamic
|
|
Disallow: /subbatch # dynamic
|
|
Disallow: /vadd # dynamic
|
|
Disallow: /vsearch # dynamic
|
|
Disallow: /vedit # dynamic
|
|
Disallow: /vdelete # dynamic
|
|
Disallow: /cgi-bin # dynamic
|
|
Disallow: /images/ # useless images
|
|
Disallow: /icons/ # useless images
|
|
Disallow: /concerts/ # deprecated URL form
|
|
Disallow: /conferences # defunct
|
|
Disallow: /musician # defunct
|
|
Disallow: /~skip/volkswagen # defunct
|
|
Disallow: /%7Eskip/volkswagen # defunct
|
|
|