# Exemple robots.txt

Exemple de fichier robots.txt

[http://www.arnold-soft.de/robots.txt](http://www.arnold-soft.de/robots.txt)

```
# BotDoku: de.wetena.com/bot
# Doku:    www.robotstxt.org

# erlaubte robots
# google.com Googlebot
# bing.com bingbot
# msn.com
# MSIE
# info@netcraft.com

# Hier ein Liste der unerwuenschten robots
# Yandex
# www.infohelfer.de
# warebay.com
# thunderstone.com
# pixray.com
# aihit.com
# ips-agent
# MALC

# metadatalabs.com
User-agent: MLBot
Disallow: /

#  Ahrefs.com (<a class="external free" href="http://ahrefs.com/robot/" rel="nofollow">http://ahrefs.com/robot/</a>)
#  IP 5.10.83.36
#  "Mozilla/5.0 (compatible; AhrefsBot/5.0; +<a class="external free" href="http://ahrefs.com/robot/" rel="nofollow">http://ahrefs.com/robot/</a>)"
user-agent: AhrefsBot
disallow: /

# ezooms.bot
User-agent: Ezooms
Disallow: /

# domaintools.com
User-agent: SurveyBot
Disallow: /

# www.infohelfer.de
User-agent: Infohelfer
Disallow: /

# www.pixray.com
User-agent: Pixray*
Disallow: /

# warebay.com
User-agent: WBSearchBot
Disallow: /

# aihit.com
User-agent: aiHitBot
Disallow: /

# yandex.com  YandexBot YandexImages
# IP 141.8.147.17
# "Mozilla/5.0 (compatible; YandexBot/3.0; +<a class="external free" href="http://yandex.com/bots" rel="nofollow">http://yandex.com/bots</a>)"
User-agent: YandexBot
Disallow: /
User-agent: YandexImages
Disallow: /

# U 
User-agent: U
Disallow: /

# unister.de
User-agent: UnisterBot
Disallow: /

# www.Nutch.de
# IP 62.146.2.234, 117.78.13.18
# "Domnutch-Bot/Nutch-1.0 (Domnutch; <a class="external free" href="http://www.nutch.de/" rel="nofollow">http://www.Nutch.de/</a>)"
User-agent: nutch-1.4
Disallow: /

User-agent: discobot
Disallow: /

# SEO Spider  spider@spiderlytics.com 
# IP 5.199.136.130
# "Mozilla/5.0 (compatible; Spiderlytics/1.0; +spider@spiderlytics.com)"
User-agent: Spiderlytics
Disallow: /

# Unknown
# IP 207.241.226.239
# "ia_archiver(OS-Wayback)"
User-agent: ia_archiver
Disallow: /

# crawler@alexa.com
# IP 204.236.235.245
# "ia_archiver (+<a class="external free" href="http://www.alexa.com/site/help/webmasters" rel="nofollow">http://www.alexa.com/site/help/webmasters</a>; crawler@alexa.com)"
User-agent: alexa
Disallow: /

# Unknown
# IP 108.59.8.70
# "Mozilla/5.0 (compatible; MJ12bot/v1.4.4; <a class="external free" href="http://www.majestic12.co.uk/bot.php?+" rel="nofollow">http://www.majestic12.co.uk/bot.php?+</a>)"
User-agent: MJ12bot
Disallow: /

# <a class="external free" href="http://go.mail.ru/help/robots" rel="nofollow">http://go.mail.ru/help/robots</a>
# IP 217.69.133.253
# "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +<a class="external free" href="http://go.mail.ru/help/robots" rel="nofollow">http://go.mail.ru/help/robots</a>)"
User-agent: Mail.RU_Bot
Disallow: /

# macinroy.com
# IP 85.25.137.24
# "MacInroy Privacy Auditors. See jarnold.org's privacy violation report: <a class="external free" href="http://jarnold.org.macinroy.com/jarnold.org" rel="nofollow">http://jarnold.org.macinroy.com/jarnold.org</a>"
User-agent: MacInroy
Disallow: /

# www.semrush.com/bot.html
# IP 46.229.164.102
# "Mozilla/5.0 (compatible; SemrushBot/0.97; +<a class="external free" href="http://www.semrush.com/bot.html" rel="nofollow">http://www.semrush.com/bot.html</a>)"
User-agent: SemrushBot
Disallow: /

# <a class="external free" href="http://www.icjobs.de/" rel="nofollow">http://www.icjobs.de</a>
# IP 85.25.71.40
# "Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9.0.1; compatible; iCjobs Stellenangebote Jobs; <a class="external free" href="http://www.icjobs.de/" rel="nofollow">http://www.icjobs.de</a>) Gecko/20100401 iCjobs/3.2.3"
User-agent: iCjobs
Disallow: /

# <a class="external free" href="http://fulltext.sblog.cz/" rel="nofollow">http://fulltext.sblog.cz</a>
# IP 77.75.77.32
# "SeznamBot/3.0 (+<a class="external free" href="http://fulltext.sblog.cz/" rel="nofollow">http://fulltext.sblog.cz/</a>)"
User-agent: SeznamBot
Disallow: /

# <a class="external free" href="http://webmeup-crawler.com/" rel="nofollow">http://webmeup-crawler.com</a>
# IP 108.178.53.146
# "Mozilla/5.0 (compatible; BLEXBot/1.0; +<a class="external free" href="http://webmeup-crawler.com/" rel="nofollow">http://webmeup-crawler.com/</a>)"
User-agent: BLEXBot
Disallow: /

# <a class="external free" href="http://siteexplorer.info/" rel="nofollow">http://siteexplorer.info</a>
# IP 208.43.225.84
# "Mozilla/5.0 (compatible; SiteExplorer/1.0b; +<a class="external free" href="http://siteexplorer.info/" rel="nofollow">http://siteexplorer.info/</a>)"
User-agent: SiteExplorer
Disallow: /

# www.linkdex.com/about/bots
# IP 54.242.123.170, 23.22.229.75, 54.225.52.217 23.20.126.233
# "Mozilla/5.0 (compatible; linkdexbot/2.0; +<a class="external free" href="http://www.linkdex.com/about/bots/" rel="nofollow">http://www.linkdex.com/about/bots/</a>)"
User-agent: linkdexbot
Disallow: /

# www.wotbox.com/bot
# IP 81.144.138.34
# "Wotbox/2.01 (+<a class="external free" href="http://www.wotbox.com/bot/" rel="nofollow">http://www.wotbox.com/bot/</a>)"
User-agent: Wotbox
Disallow: /

# <a class="external free" href="http://www.domaintuno.com/" rel="nofollow">http://www.domaintuno.com</a>
# IP 192.96.204.42
# "<a class="external free" href="http://www.domaintuno.com/whois/jarnold.org" rel="nofollow">http://www.domaintuno.com/whois/jarnold.org</a>" "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
User-agent: domaintuno
Disallow: /

# unknown addressendeutschland.de
# IP 86.109.249.174
# "<a class="external free" href="http://arnold-soft.de/" rel="nofollow">http://arnold-soft.de/</a>" "dubaiindex (addressendeutschland.de)"
User-agent: dubaiindex
Disallow: /

# www.pagesinvenotry.com
# IP 130.185.109.243
# "PagesInventory (robot <a class="external free" href="http://www.pagesinvenotry.com/" rel="nofollow">http://www.pagesinvenotry.com</a>)"
User-agent: PagesInventory
Disallow: /

# www.abonti.com
# IP 77.233.225.115
# "Mozilla/5.0 (compatible; Abonti/0.91 - <a class="external free" href="http://www.abonti.com/" rel="nofollow">http://www.abonti.com</a>)"
User-agent: Abonti
Disallow: /

# www.backlinktest.com/crawler.html
# IP 46.4.100.231
# "BacklinkCrawler (<a class="external free" href="http://www.backlinktest.com/crawler.html" rel="nofollow">http://www.backlinktest.com/crawler.html</a>)"
User-agent: BacklinkCrawler
Disallow: /

# <a class="external free" href="http://netcomber.com/" rel="nofollow">http://netcomber.com</a>
# IP 54.227.175.17
# "NCBot <a class="external free" href="http://netcomber.com/?st=ba2Tool" rel="nofollow">http://netcomber.com?st=ba2Tool</a> for finding all their domain names."
User-agent: NCBot
Disallow: /

# Unknown
# IP 69.58.178.58
# "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:14.0; ips-agent) Gecko/20100101 Firefox/14.0.1"
User-agent: ips-agent
Disallow: /

# www.grapeshot.co.uk/crawler.php
# IP 89.145.95.2
# "Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +<a class="external free" href="http://www.grapeshot.co.uk/crawler.php" rel="nofollow">http://www.grapeshot.co.uk/crawler.php</a>)"
User-agent: GrapeshotCrawler
Disallow: /

# www.80legs.com/webcrawler.html
# IP 64.125.222.16
# "Mozilla/5.0 (compatible; 008/0.83; <a class="external free" href="http://www.80legs.com/webcrawler.html" rel="nofollow">http://www.80legs.com/webcrawler.html</a>;) Gecko/2008032620"
User-agent: 008/0.83
Disallow: /
User-agent: 008/0.85
Disallow: /

# it2media.de
# IP 86.109.249.169
# "it2media-domain-crawler/1.0 on crawler-prod.it2media.de"
User-agent: it2media-domain-crawler
Disallow: /

# <a class="external free" href="http://crawler.sistrix.net/" rel="nofollow">http://crawler.sistrix.net</a>
# IP 176.9.148.197, IP 176.9.155.226, 5.9.112.66
# "Mozilla/5.0 (compatible; SISTRIX Crawler; <a class="external free" href="http://crawler.sistrix.net/" rel="nofollow">http://crawler.sistrix.net/</a>)"
User-agent: SISTRIX
Disallow: /

# www.picsearch.com/bot.html
# IP 217.212.224.183
# "psbot/0.1 (+<a class="external free" href="http://www.picsearch.com/bot.html" rel="nofollow">http://www.picsearch.com/bot.html</a>)"
User-agent: psbot
Disallow: /

# worio.com
# IP 107.22.250.59
# "Mozilla/5.0 (compatible; woriobot +<a class="external free" href="http://worio.com/" rel="nofollow">http://worio.com</a>)"
User-agent: woriobot
Disallow: /

# semantissimo.de
# IP 88.198.24.173
# "ssearch_bot (sSearch Crawler; <a class="external free" href="http://www.semantissimo.de/" rel="nofollow">http://www.semantissimo.de</a>)"
User-agent: sSearch
Disallow: /

# www.archive.org/details/archive.org_bot
# IP 207.241.237.102  + .103 (abwechselnd!) + 207.241.226.234
# Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +<a class="external free" href="http://archive.org/details/archive.org_bot" rel="nofollow">http://archive.org/details/archive.org_bot</a>)"
User-agent: archive.org_bot
Disallow: /

# +spider@waybackarchive.org
# IP 5.199.136.130
# "Mozilla/5.0 (compatible; waybackarchive.org/1.0; +spider@waybackarchive.org)"
User-agent: waybackarchive.org
Disallow: /

# www.website-datenbank.de
# IP 81.209.177.145
# "netEstate NE Crawler (+<a class="external free" href="http://www.website-datenbank.de/" rel="nofollow">http://www.website-datenbank.de/</a>)"
User-agent: netEstate
Disallow: /

# www.compspy.com/spider.html
# IP 68.47.129.55
# "Mozilla/5.0 (compatible; CompSpyBot/1.0; +<a class="external free" href="http://www.compspy.com/spider.html" rel="nofollow">http://www.compspy.com/spider.html</a>)"
User-agent: CompSpyBot
Disallow: /

# www.seoprofiler.com/bot
# IP 198.199.89.149, 162.243.203.202
# "Mozilla/5.0 (compatible; spbot/4.1.0; +<a class="external free" href="http://openlinkprofiler.org/bot" rel="nofollow">http://OpenLinkProfiler.org/bot</a> )"
User-agent: spbot
Disallow: /

# <a class="external free" href="http://filterdb.iss.net/crawler/" rel="nofollow">http://filterdb.iss.net/crawler/</a>
# IP 206.253.226.18
# "Mozilla/5.0 (compatible; oBot/2.3.1; <a class="external free" href="http://filterdb.iss.net/crawler/" rel="nofollow">http://filterdb.iss.net/crawler/</a>)"
User-agent: oBot
Disallow: /

# <a class="external free" href="http://www.baidu.com/" rel="nofollow">http://www.baidu.com</a>
# 183.60.243.187
# "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0"
User-agent: baidu
Disallow: /

# <a class="external free" href="http://www.exabot.com/go/robot" rel="nofollow">http://www.exabot.com/go/robot</a>
# IP 178.255.215.69
# "Mozilla/5.0 (compatible; Exabot/3.0; +<a class="external free" href="http://www.exabot.com/go/robot" rel="nofollow">http://www.exabot.com/go/robot</a>)"
User-agent: Exabot
Disallow: /

# www.tiscali.it
# IP 217.73.208.103
# "Mozilla/5.0 (compatible; IstellaBot/1.18.81 +<a class="external free" href="http://www.tiscali.it/" rel="nofollow">http://www.tiscali.it/</a>)"
User-agent: IstellaBot
Disallow: /

# www.netseer.com/crawler.html
# IP 75.98.9.250
# "Mozilla/5.0 (compatible; NetSeer crawler/2.0; +<a class="external free" href="http://www.netseer.com/crawler.html" rel="nofollow">http://www.netseer.com/crawler.html</a>; crawler@netseer.com)"
User-agent: NetSeer
Disallow: /

# <a class="external free" href="http://www.opensiteexplorer.org/dotbot" rel="nofollow">http://www.opensiteexplorer.org/dotbot</a>, help@moz.com
# IP 208.115.113.92
# "Mozilla/5.0 (compatible; DotBot/1.1; <a class="external free" href="http://www.opensiteexplorer.org/dotbot" rel="nofollow">http://www.opensiteexplorer.org/dotbot</a>, help@moz.com)"
User-agent: DotBot
Disallow: /

# <a class="external free" href="http://www.proximic.com/info/spider.php" rel="nofollow">http://www.proximic.com/info/spider.php#</a> IP 54.211.1.18
# "Mozilla/5.0 (compatible; proximic; +<a class="external free" href="http://www.proximic.com/info/spider.php" rel="nofollow">http://www.proximic.com/info/spider.php</a>)"
User-agent: proximic
Disallow: /

# <a class="external free" href="http://commoncrawl.org/faq/" rel="nofollow">http://commoncrawl.org/faq/</a>
# IP 54.227.12.4
# "CCBot/2.0 (<a class="external free" href="http://commoncrawl.org/faq/" rel="nofollow">http://commoncrawl.org/faq/</a>)"
User-agent: CCBot
Disallow: /

# 
# IP 130.211.186.147, 146.148.35.52
# "GET / HTTP/1.0" 200 10064 "-" "NerdyBot"
User-agent: NerdyBot
Disallow: /

# <a class="external free" href="http://semalt.semalt.com/crawler.php" rel="nofollow">http://semalt.semalt.com/crawler.php</a>
# IP 187.79.214.121
# "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36"
#User-agent: xxx
#Disallow: /

#
# IP 69.84.207.246
# "LSSRocketCrawler/1.0 LightspeedSystems"
User-agent: LSSRocketCrawler
Disallow: /

# ???
# 50.17.21.141
# "Cliqzbot"
User-agent: Cliqzbot
Disallow: /

User-agent: Mediapartners-Google*
Disallow: /

# standard Einstellungen
User-agent: *
Disallow: /atd/
Disallow: /backup/
Disallow: /files/
Disallow: /log/
Disallow: /phptmp/
Disallow: /restore/
Disallow: /html/_media/
Disallow: /html/media/images/
Disallow: /html/media/Scripting/
Disallow: /html/cgi-bin/
Disallow: /html/mediawiki/

# Allow: /html/
# Allow: /html/media/files/
```