diff --git a/code/robots.py b/code/robots.py index 86ea413..8a06b55 100755 --- a/code/robots.py +++ b/code/robots.py @@ -30,6 +30,7 @@ def updated_robots_json(soup): """Update AI scraper information with data from darkvisitors.""" existing_content = load_robots_json() to_include = [ + "AI Agents", "AI Assistants", "AI Data Scrapers", "AI Search Crawlers", diff --git a/robots.json b/robots.json index eff38ac..ea930d8 100644 --- a/robots.json +++ b/robots.json @@ -69,13 +69,6 @@ "frequency": "Only when prompted by a user.", "description": "Used by plugins in ChatGPT to answer queries based on user input." }, - "Claude-Web": { - "operator": "[Anthropic](https://www.anthropic.com)", - "respect": "Unclear at this time.", - "function": "Scrapes data to train Anthropic's AI products.", - "frequency": "No information provided.", - "description": "Scrapes data to train LLMs and AI products offered by Anthropic." - }, "ClaudeBot": { "operator": "[Anthropic](https://www.anthropic.com)", "respect": "[Yes](https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler)", @@ -83,6 +76,20 @@ "frequency": "No information provided.", "description": "Scrapes data to train LLMs and AI products offered by Anthropic." }, + "Claude-User": { + "operator": "[Anthropic](https://www.anthropic.com)", + "respect": "Unclear at this time.", + "function": "Claude-User supports Claude AI users. When individuals ask questions to Claude, it may access websites using a Claude-User agent.", + "frequency": "No information provided.", + "description": "Claude-User supports Claude AI users. When individuals ask questions to Claude, it may access websites using a Claude-User agent." + }, + "Claude-SearchBot": { + "operator": "[Anthropic](https://www.anthropic.com)", + "respect": "Unclear at this time.", + "function": "Claude-SearchBot navigates the web to improve search result quality for users. It analyzes online content specifically to enhance the relevance and accuracy of search responses.", + "frequency": "No information provided.", + "description": "Claude-SearchBot navigates the web to improve search result quality for users. It analyzes online content specifically to enhance the relevance and accuracy of search responses." + }, "cohere-ai": { "operator": "[Cohere](https://cohere.com)", "respect": "Unclear at this time.", @@ -230,6 +237,13 @@ "frequency": "Unclear at this time.", "description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher" }, + "NovaAct": { + "operator": "Unclear at this time.", + "respect": "Unclear at this time.", + "function": "AI Agents", + "frequency": "Unclear at this time.", + "description": "Nova Act is an AI agent created by Amazon that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/novaact" + }, "OAI-SearchBot": { "operator": "[OpenAI](https://openai.com)", "respect": "[Yes](https://platform.openai.com/docs/bots)", @@ -251,6 +265,13 @@ "operator": "[Webz.io](https://webz.io/)", "respect": "[Yes](https://web.archive.org/web/20170704003301/http://omgili.com/Crawler.html)" }, + "Operator": { + "operator": "Unclear at this time.", + "respect": "Unclear at this time.", + "function": "AI Agents", + "frequency": "Unclear at this time.", + "description": "Operator is an AI agent created by OpenAI that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/operator" + }, "PanguBot": { "operator": "the Chinese company Huawei", "respect": "Unclear at this time.", @@ -335,4 +356,4 @@ "frequency": "No information.", "description": "Retrieves data used for You.com web search engine and LLMs." } -} \ No newline at end of file +} diff --git a/robots.txt b/robots.txt index de25a56..c029fb6 100644 --- a/robots.txt +++ b/robots.txt @@ -8,8 +8,9 @@ User-agent: Brightbot 1.0 User-agent: Bytespider User-agent: CCBot User-agent: ChatGPT-User -User-agent: Claude-Web User-agent: ClaudeBot +User-agent: Claude-User +User-agent: Claude-SearchBot User-agent: cohere-ai User-agent: cohere-training-data-crawler User-agent: Crawlspace