From d79ca19f38f2bdcfb0f4b61c1f58c3d6488cf74d Mon Sep 17 00:00:00 2001 From: Katrin Leinweber Date: Thu, 27 Mar 2025 17:59:09 +0100 Subject: [PATCH 1/3] Add Lightpanda due to its AI/LLM focus https://github.com/lightpanda-io/browser --- robots.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/robots.json b/robots.json index cdc7bb5..a084dbe 100644 --- a/robots.json +++ b/robots.json @@ -209,6 +209,13 @@ "frequency": "Unclear at this time.", "description": "Kangaroo Bot is used by the company Kangaroo LLM to download data to train AI models tailored to Australian language and culture. More info can be found at https://darkvisitors.com/agents/agents/kangaroo-bot" }, + "Lightpanda": { + "operator": "Unclear at this time.", + "respect": "Unclear at this time.", + "function": "AI Data Scraper", + "frequency": "Unclear at this time.", + "description": "Lightpanda is a headless browser intended for 'AI agents, LLM training, scraping and testing': https://github.com/lightpanda-io/browser" + }, "Meta-ExternalAgent": { "operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)", "respect": "Yes.", From d9f882a9b21170754c4b37ff1bbc237171876684 Mon Sep 17 00:00:00 2001 From: Joshua Sheard Date: Mon, 14 Apr 2025 15:46:01 +0100 Subject: [PATCH 2/3] Include "AI Agents" from Dark Visitors --- code/robots.py | 1 + 1 file changed, 1 insertion(+) diff --git a/code/robots.py b/code/robots.py index 86ea413..8a06b55 100755 --- a/code/robots.py +++ b/code/robots.py @@ -30,6 +30,7 @@ def updated_robots_json(soup): """Update AI scraper information with data from darkvisitors.""" existing_content = load_robots_json() to_include = [ + "AI Agents", "AI Assistants", "AI Data Scrapers", "AI Search Crawlers", From a96e33098975edf1c05c8d9684b36b9fa31f7ef2 Mon Sep 17 00:00:00 2001 From: dark-visitors Date: Tue, 15 Apr 2025 00:57:01 +0000 Subject: [PATCH 3/3] Update from Dark Visitors --- robots.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/robots.json b/robots.json index eff38ac..8bba6b2 100644 --- a/robots.json +++ b/robots.json @@ -230,6 +230,13 @@ "frequency": "Unclear at this time.", "description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher" }, + "NovaAct": { + "operator": "Unclear at this time.", + "respect": "Unclear at this time.", + "function": "AI Agents", + "frequency": "Unclear at this time.", + "description": "Nova Act is an AI agent created by Amazon that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/novaact" + }, "OAI-SearchBot": { "operator": "[OpenAI](https://openai.com)", "respect": "[Yes](https://platform.openai.com/docs/bots)", @@ -251,6 +258,13 @@ "operator": "[Webz.io](https://webz.io/)", "respect": "[Yes](https://web.archive.org/web/20170704003301/http://omgili.com/Crawler.html)" }, + "Operator": { + "operator": "Unclear at this time.", + "respect": "Unclear at this time.", + "function": "AI Agents", + "frequency": "Unclear at this time.", + "description": "Operator is an AI agent created by OpenAI that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/operator" + }, "PanguBot": { "operator": "the Chinese company Huawei", "respect": "Unclear at this time.",