diff --git a/code/robots.py b/code/robots.py index 86ea413..8a06b55 100755 --- a/code/robots.py +++ b/code/robots.py @@ -30,6 +30,7 @@ def updated_robots_json(soup): """Update AI scraper information with data from darkvisitors.""" existing_content = load_robots_json() to_include = [ + "AI Agents", "AI Assistants", "AI Data Scrapers", "AI Search Crawlers", diff --git a/robots.json b/robots.json index eff38ac..de1dc36 100644 --- a/robots.json +++ b/robots.json @@ -216,6 +216,13 @@ "frequency": "Unclear at this time.", "description": "Kangaroo Bot is used by the company Kangaroo LLM to download data to train AI models tailored to Australian language and culture. More info can be found at https://darkvisitors.com/agents/agents/kangaroo-bot" }, + "Lightpanda": { + "operator": "Unclear at this time.", + "respect": "Unclear at this time.", + "function": "AI Data Scraper", + "frequency": "Unclear at this time.", + "description": "Lightpanda is a headless browser intended for 'AI agents, LLM training, scraping and testing': https://github.com/lightpanda-io/browser" + }, "Meta-ExternalAgent": { "operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)", "respect": "Yes.", @@ -230,6 +237,13 @@ "frequency": "Unclear at this time.", "description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher" }, + "NovaAct": { + "operator": "Unclear at this time.", + "respect": "Unclear at this time.", + "function": "AI Agents", + "frequency": "Unclear at this time.", + "description": "Nova Act is an AI agent created by Amazon that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/novaact" + }, "OAI-SearchBot": { "operator": "[OpenAI](https://openai.com)", "respect": "[Yes](https://platform.openai.com/docs/bots)", @@ -251,6 +265,13 @@ "operator": "[Webz.io](https://webz.io/)", "respect": "[Yes](https://web.archive.org/web/20170704003301/http://omgili.com/Crawler.html)" }, + "Operator": { + "operator": "Unclear at this time.", + "respect": "Unclear at this time.", + "function": "AI Agents", + "frequency": "Unclear at this time.", + "description": "Operator is an AI agent created by OpenAI that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/operator" + }, "PanguBot": { "operator": "the Chinese company Huawei", "respect": "Unclear at this time.",