mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-06-19 10:00:52 +00:00
Compare commits
6 commits
b73532abb1
...
ed25a8a2ad
Author | SHA1 | Date | |
---|---|---|---|
![]() |
ed25a8a2ad | ||
![]() |
a96e330989 | ||
156e6baa09 | |||
![]() |
d9f882a9b2 | ||
![]() |
8dc36aa2e2 | ||
![]() |
ae8f74c10c |
3 changed files with 32 additions and 9 deletions
|
@ -30,6 +30,7 @@ def updated_robots_json(soup):
|
||||||
"""Update AI scraper information with data from darkvisitors."""
|
"""Update AI scraper information with data from darkvisitors."""
|
||||||
existing_content = load_robots_json()
|
existing_content = load_robots_json()
|
||||||
to_include = [
|
to_include = [
|
||||||
|
"AI Agents",
|
||||||
"AI Assistants",
|
"AI Assistants",
|
||||||
"AI Data Scrapers",
|
"AI Data Scrapers",
|
||||||
"AI Search Crawlers",
|
"AI Search Crawlers",
|
||||||
|
|
37
robots.json
37
robots.json
|
@ -69,13 +69,6 @@
|
||||||
"frequency": "Only when prompted by a user.",
|
"frequency": "Only when prompted by a user.",
|
||||||
"description": "Used by plugins in ChatGPT to answer queries based on user input."
|
"description": "Used by plugins in ChatGPT to answer queries based on user input."
|
||||||
},
|
},
|
||||||
"Claude-Web": {
|
|
||||||
"operator": "[Anthropic](https://www.anthropic.com)",
|
|
||||||
"respect": "Unclear at this time.",
|
|
||||||
"function": "Scrapes data to train Anthropic's AI products.",
|
|
||||||
"frequency": "No information provided.",
|
|
||||||
"description": "Scrapes data to train LLMs and AI products offered by Anthropic."
|
|
||||||
},
|
|
||||||
"ClaudeBot": {
|
"ClaudeBot": {
|
||||||
"operator": "[Anthropic](https://www.anthropic.com)",
|
"operator": "[Anthropic](https://www.anthropic.com)",
|
||||||
"respect": "[Yes](https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler)",
|
"respect": "[Yes](https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler)",
|
||||||
|
@ -83,6 +76,20 @@
|
||||||
"frequency": "No information provided.",
|
"frequency": "No information provided.",
|
||||||
"description": "Scrapes data to train LLMs and AI products offered by Anthropic."
|
"description": "Scrapes data to train LLMs and AI products offered by Anthropic."
|
||||||
},
|
},
|
||||||
|
"Claude-User": {
|
||||||
|
"operator": "[Anthropic](https://www.anthropic.com)",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "Claude-User supports Claude AI users. When individuals ask questions to Claude, it may access websites using a Claude-User agent.",
|
||||||
|
"frequency": "No information provided.",
|
||||||
|
"description": "Claude-User supports Claude AI users. When individuals ask questions to Claude, it may access websites using a Claude-User agent."
|
||||||
|
},
|
||||||
|
"Claude-SearchBot": {
|
||||||
|
"operator": "[Anthropic](https://www.anthropic.com)",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "Claude-SearchBot navigates the web to improve search result quality for users. It analyzes online content specifically to enhance the relevance and accuracy of search responses.",
|
||||||
|
"frequency": "No information provided.",
|
||||||
|
"description": "Claude-SearchBot navigates the web to improve search result quality for users. It analyzes online content specifically to enhance the relevance and accuracy of search responses."
|
||||||
|
},
|
||||||
"cohere-ai": {
|
"cohere-ai": {
|
||||||
"operator": "[Cohere](https://cohere.com)",
|
"operator": "[Cohere](https://cohere.com)",
|
||||||
"respect": "Unclear at this time.",
|
"respect": "Unclear at this time.",
|
||||||
|
@ -230,6 +237,13 @@
|
||||||
"frequency": "Unclear at this time.",
|
"frequency": "Unclear at this time.",
|
||||||
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
|
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
|
||||||
},
|
},
|
||||||
|
"NovaAct": {
|
||||||
|
"operator": "Unclear at this time.",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Agents",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Nova Act is an AI agent created by Amazon that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/novaact"
|
||||||
|
},
|
||||||
"OAI-SearchBot": {
|
"OAI-SearchBot": {
|
||||||
"operator": "[OpenAI](https://openai.com)",
|
"operator": "[OpenAI](https://openai.com)",
|
||||||
"respect": "[Yes](https://platform.openai.com/docs/bots)",
|
"respect": "[Yes](https://platform.openai.com/docs/bots)",
|
||||||
|
@ -251,6 +265,13 @@
|
||||||
"operator": "[Webz.io](https://webz.io/)",
|
"operator": "[Webz.io](https://webz.io/)",
|
||||||
"respect": "[Yes](https://web.archive.org/web/20170704003301/http://omgili.com/Crawler.html)"
|
"respect": "[Yes](https://web.archive.org/web/20170704003301/http://omgili.com/Crawler.html)"
|
||||||
},
|
},
|
||||||
|
"Operator": {
|
||||||
|
"operator": "Unclear at this time.",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Agents",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Operator is an AI agent created by OpenAI that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/operator"
|
||||||
|
},
|
||||||
"PanguBot": {
|
"PanguBot": {
|
||||||
"operator": "the Chinese company Huawei",
|
"operator": "the Chinese company Huawei",
|
||||||
"respect": "Unclear at this time.",
|
"respect": "Unclear at this time.",
|
||||||
|
@ -335,4 +356,4 @@
|
||||||
"frequency": "No information.",
|
"frequency": "No information.",
|
||||||
"description": "Retrieves data used for You.com web search engine and LLMs."
|
"description": "Retrieves data used for You.com web search engine and LLMs."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,8 +8,9 @@ User-agent: Brightbot 1.0
|
||||||
User-agent: Bytespider
|
User-agent: Bytespider
|
||||||
User-agent: CCBot
|
User-agent: CCBot
|
||||||
User-agent: ChatGPT-User
|
User-agent: ChatGPT-User
|
||||||
User-agent: Claude-Web
|
|
||||||
User-agent: ClaudeBot
|
User-agent: ClaudeBot
|
||||||
|
User-agent: Claude-User
|
||||||
|
User-agent: Claude-SearchBot
|
||||||
User-agent: cohere-ai
|
User-agent: cohere-ai
|
||||||
User-agent: cohere-training-data-crawler
|
User-agent: cohere-training-data-crawler
|
||||||
User-agent: Crawlspace
|
User-agent: Crawlspace
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue