Difference between revisions of "MediaWiki:Robots.txt"

From SacredWiki
Jump to navigation Jump to search
(Created page with "# # robots.txt for http://www.wikipedia.org/ and friends # # Please note: There are a lot of pages on this site, and there are # some misbehaved spiders out there that go _way...")
 
(Blanked the page)
 
Line 1: Line 1:
#
 
# robots.txt for http://www.wikipedia.org/ and friends
 
#
 
# Please note: There are a lot of pages on this site, and there are
 
# some misbehaved spiders out there that go _way_ too fast. If you're
 
# irresponsible, your access to the site may be blocked.
 
#
 
  
User-agent: Mediapartners-Google
 
Disallow:
 
 
User-agent: Fasterfox
 
Disallow: /
 
 
# Wikipedia work bots:
 
User-agent: IsraBot
 
Disallow:
 
 
User-agent: Orthogaffe
 
Disallow:
 
 
# Crawlers that are kind enough to obey, but which we'd rather not have
 
# unless they're feeding search engines.
 
User-agent: UbiCrawler
 
Disallow: /
 
 
User-agent: DOC
 
Disallow: /
 
 
User-agent: Zao
 
Disallow: /
 
 
# Some bots are known to be trouble, particularly those designed to copy
 
# entire sites. Please obey robots.txt.
 
User-agent: sitecheck.internetseer.com
 
Disallow: /
 
 
User-agent: Zealbot
 
Disallow: /
 
 
User-agent: MSIECrawler
 
Disallow: /
 
 
User-agent: SiteSnagger
 
Disallow: /
 
 
User-agent: WebStripper
 
Disallow: /
 
 
User-agent: WebCopier
 
Disallow: /
 
 
User-agent: Fetch
 
Disallow: /
 
 
User-agent: Offline Explorer
 
Disallow: /
 
 
User-agent: Teleport
 
Disallow: /
 
 
User-agent: TeleportPro
 
Disallow: /
 
 
User-agent: WebZIP
 
Disallow: /
 
 
User-agent: linko
 
Disallow: /
 
 
User-agent: HTTrack
 
Disallow: /
 
 
User-agent: Microsoft.URL.Control
 
Disallow: /
 
 
User-agent: Xenu
 
Disallow: /
 
 
User-agent: larbin
 
Disallow: /
 
 
User-agent: libwww
 
Disallow: /
 
 
User-agent: ZyBORG
 
Disallow: /
 
 
User-agent: Download Ninja
 
Disallow: /
 
 
#
 
# Sorry, wget in its recursive mode is a frequent problem.
 
# Please read the man page and use it properly; there is a
 
# --wait option you can use to set the delay between hits,
 
# for instance.
 
#
 
User-agent: wget
 
Disallow: /
 
 
#
 
# The 'grub' distributed client has been *very* poorly behaved.
 
#
 
User-agent: grub-client
 
Disallow: /
 
 
#
 
# Doesn't follow robots.txt anyway, but...
 
#
 
User-agent: k2spider
 
Disallow: /
 
 
#
 
# Hits many times per second, not acceptable
 
# http://www.nameprotect.com/botinfo.html
 
User-agent: NPBot
 
Disallow: /
 
 
# A capture bot, downloads gazillions of pages with no public benefit
 
# http://www.webreaper.net/
 
User-agent: WebReaper
 
Disallow: /
 
 
# Don't allow the wayback-maschine to index user-pages
 
User-agent: ia_archiver
 
Disallow: /
 
#Disallow: /SacredWiki/User
 
#Disallow: /SacredWiki/Benutzer
 
 
#
 
# Friendly, low-speed bots are welcome viewing article pages, but not
 
# dynamically-generated pages please.
 
#
 
# Inktomi's "Slurp" can read a minimum delay between hits; if your
 
# bot supports such a thing using the 'Crawl-delay' or another
 
# instruction, please let us know.
 
#
 
User-agent: *
 
Disallow: /forum/
 
Disallow: /testforum/
 
Disallow: /t/
 
Disallow: /test/
 
Disallow: /SacredWiki/Especial:Search
 
Disallow: /SacredWiki/Especial%3ASearch
 
Disallow: /SacredWiki/Special:Random
 
Disallow: /SacredWiki/Special%3ARandom
 
Disallow: /SacredWiki/Special:Search
 
Disallow: /SacredWiki/Special%3ASearch
 
Disallow: /SacredWiki/Spesial:Search
 
Disallow: /SacredWiki/Spesial%3ASearch
 
Disallow: /SacredWiki/Spezial:Search
 
Disallow: /SacredWiki/Spezial%3ASearch
 
Disallow: /SacredWiki/Specjalna:Search
 
Disallow: /SacredWiki/Specjalna%3ASearch
 
Disallow: /SacredWiki/Speciaal:Search
 
Disallow: /SacredWiki/Speciaal%3ASearch
 
Disallow: /SacredWiki/Speciaal:Random
 
Disallow: /SacredWiki/Speciaal%3ARandom
 
Disallow: /SacredWiki/Speciel:Search
 
Disallow: /SacredWiki/Speciel%3ASearch
 
Disallow: /SacredWiki/Speciale:Search
 
Disallow: /SacredWiki/Speciale%3ASearch
 
Disallow: /SacredWiki/Istimewa:Search
 
Disallow: /SacredWiki/Istimewa%3ASearch
 
Disallow: /SacredWiki/Toiminnot:Search
 
Disallow: /SacredWiki/Toiminnot%3ASearch
 
#
 
# ar:
 
Disallow: /SacredWiki/%D8%AE%D8%A7%D8%B5:Search
 
Disallow: /SacredWiki/%D8%AE%D8%A7%D8%B5%3ASearch
 
 
User-agent: Slurp
 
Crawl-delay: 60
 

Latest revision as of 17:53, 14 August 2013