mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-06-19 10:00:52 +00:00
Compare commits
5 commits
75deb2cef7
...
e9d645098d
Author | SHA1 | Date | |
---|---|---|---|
![]() |
e9d645098d | ||
![]() |
a96e330989 | ||
156e6baa09 | |||
![]() |
d9f882a9b2 | ||
![]() |
d79ca19f38 |
2 changed files with 22 additions and 0 deletions
|
@ -30,6 +30,7 @@ def updated_robots_json(soup):
|
||||||
"""Update AI scraper information with data from darkvisitors."""
|
"""Update AI scraper information with data from darkvisitors."""
|
||||||
existing_content = load_robots_json()
|
existing_content = load_robots_json()
|
||||||
to_include = [
|
to_include = [
|
||||||
|
"AI Agents",
|
||||||
"AI Assistants",
|
"AI Assistants",
|
||||||
"AI Data Scrapers",
|
"AI Data Scrapers",
|
||||||
"AI Search Crawlers",
|
"AI Search Crawlers",
|
||||||
|
|
21
robots.json
21
robots.json
|
@ -216,6 +216,13 @@
|
||||||
"frequency": "Unclear at this time.",
|
"frequency": "Unclear at this time.",
|
||||||
"description": "Kangaroo Bot is used by the company Kangaroo LLM to download data to train AI models tailored to Australian language and culture. More info can be found at https://darkvisitors.com/agents/agents/kangaroo-bot"
|
"description": "Kangaroo Bot is used by the company Kangaroo LLM to download data to train AI models tailored to Australian language and culture. More info can be found at https://darkvisitors.com/agents/agents/kangaroo-bot"
|
||||||
},
|
},
|
||||||
|
"Lightpanda": {
|
||||||
|
"operator": "Unclear at this time.",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Data Scraper",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Lightpanda is a headless browser intended for 'AI agents, LLM training, scraping and testing': https://github.com/lightpanda-io/browser"
|
||||||
|
},
|
||||||
"Meta-ExternalAgent": {
|
"Meta-ExternalAgent": {
|
||||||
"operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)",
|
"operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)",
|
||||||
"respect": "Yes.",
|
"respect": "Yes.",
|
||||||
|
@ -230,6 +237,13 @@
|
||||||
"frequency": "Unclear at this time.",
|
"frequency": "Unclear at this time.",
|
||||||
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
|
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
|
||||||
},
|
},
|
||||||
|
"NovaAct": {
|
||||||
|
"operator": "Unclear at this time.",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Agents",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Nova Act is an AI agent created by Amazon that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/novaact"
|
||||||
|
},
|
||||||
"OAI-SearchBot": {
|
"OAI-SearchBot": {
|
||||||
"operator": "[OpenAI](https://openai.com)",
|
"operator": "[OpenAI](https://openai.com)",
|
||||||
"respect": "[Yes](https://platform.openai.com/docs/bots)",
|
"respect": "[Yes](https://platform.openai.com/docs/bots)",
|
||||||
|
@ -251,6 +265,13 @@
|
||||||
"operator": "[Webz.io](https://webz.io/)",
|
"operator": "[Webz.io](https://webz.io/)",
|
||||||
"respect": "[Yes](https://web.archive.org/web/20170704003301/http://omgili.com/Crawler.html)"
|
"respect": "[Yes](https://web.archive.org/web/20170704003301/http://omgili.com/Crawler.html)"
|
||||||
},
|
},
|
||||||
|
"Operator": {
|
||||||
|
"operator": "Unclear at this time.",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Agents",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Operator is an AI agent created by OpenAI that can use a web browser. It can intelligently navigate and interact with websites to complete multi-step tasks on behalf of a human user. More info can be found at https://darkvisitors.com/agents/agents/operator"
|
||||||
|
},
|
||||||
"PanguBot": {
|
"PanguBot": {
|
||||||
"operator": "the Chinese company Huawei",
|
"operator": "the Chinese company Huawei",
|
||||||
"respect": "Unclear at this time.",
|
"respect": "Unclear at this time.",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue