Compare commits

...

6 commits

Author SHA1 Message Date
Kyle Buckingham
ed25a8a2ad
Merge 8dc36aa2e2 into a96e330989 2025-04-15 12:41:53 +02:00
dark-visitors
a96e330989 Update from Dark Visitors
Some checks are pending
/ run-tests (push) Waiting to run
2025-04-15 00:57:01 +00:00
156e6baa09
Merge pull request #105 from jsheard/patch-1
Some checks are pending
/ run-tests (push) Waiting to run
Include "AI Agents" from Dark Visitors
2025-04-14 10:08:38 -07:00
Joshua Sheard
d9f882a9b2
Include "AI Agents" from Dark Visitors 2025-04-14 15:46:01 +01:00
Kyle Buckingham
8dc36aa2e2
Update robots.txt 2025-04-01 15:23:28 -07:00
Kyle Buckingham
ae8f74c10c
Update robots.json 2025-04-01 15:22:04 -07:00
3 changed files with 32 additions and 9 deletions

View file

@ -30,6 +30,7 @@ def updated_robots_json(soup):
"""Update AI scraper information with data from darkvisitors."""
existing_content = load_robots_json()
to_include = [
"AI Agents",
"AI Assistants",
"AI Data Scrapers",
"AI Search Crawlers",

View file

@ -69,13 +69,6 @@
"frequency": "Only when prompted by a user.",
"description": "Used by plugins in ChatGPT to answer queries based on user input."
},
"Claude-Web": {
"operator": "[Anthropic](https://www.anthropic.com)",
"respect": "Unclear at this time.",
"function": "Scrapes data to train Anthropic's AI products.",
"frequency": "No information provided.",
"description": "Scrapes data to train LLMs and AI products offered by Anthropic."
},
"ClaudeBot": {
"operator": "[Anthropic](https://www.anthropic.com)",
"respect": "[Yes](https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler)",
@ -83,6 +76,20 @@
"frequency": "No information provided.",
"description": "Scrapes data to train LLMs and AI products offered by Anthropic."
},
"Claude-User": {
"operator": "[Anthropic](https://www.anthropic.com)",
"respect": "Unclear at this time.",
"function": "Claude-User supports Claude AI users. When individuals ask questions to Claude, it may access websites using a Claude-User agent.",
"frequency": "No information provided.",
"description": "Claude-User supports Claude AI users. When individuals ask questions to Claude, it may access websites using a Claude-User agent."
},
"Claude-SearchBot": {
"operator": "[Anthropic](https://www.anthropic.com)",
"respect": "Unclear at this time.",
"function": "Claude-SearchBot navigates the web to improve search result quality for users. It analyzes online content specifically to enhance the relevance and accuracy of search responses.",
"frequency": "No information provided.",
"description": "Claude-SearchBot navigates the web to improve search result quality for users. It analyzes online content specifically to enhance the relevance and accuracy of search responses."
},
"cohere-ai": {
"operator": "[Cohere](https://cohere.com)",
"respect": "Unclear at this time.",
@ -230,6 +237,13 @@
"frequency": "Unclear at this time.",
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
},
"NovaAct": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Agents",
"frequency": "Unclear at this time.",
"description": "Nova Act is an AI agent created by Amazon that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/novaact"
},
"OAI-SearchBot": {
"operator": "[OpenAI](https://openai.com)",
"respect": "[Yes](https://platform.openai.com/docs/bots)",
@ -251,6 +265,13 @@
"operator": "[Webz.io](https://webz.io/)",
"respect": "[Yes](https://web.archive.org/web/20170704003301/http://omgili.com/Crawler.html)"
},
"Operator": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Agents",
"frequency": "Unclear at this time.",
"description": "Operator is an AI agent created by OpenAI that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/operator"
},
"PanguBot": {
"operator": "the Chinese company Huawei",
"respect": "Unclear at this time.",

View file

@ -8,8 +8,9 @@ User-agent: Brightbot 1.0
User-agent: Bytespider
User-agent: CCBot
User-agent: ChatGPT-User
User-agent: Claude-Web
User-agent: ClaudeBot
User-agent: Claude-User
User-agent: Claude-SearchBot
User-agent: cohere-ai
User-agent: cohere-training-data-crawler
User-agent: Crawlspace