From d8dedef9f916cd3a487032b5d846864961d1d0a0 Mon Sep 17 00:00:00 2001
From: Cory Dransfeldt <coryd@hey.com>
Date: Sun, 14 Apr 2024 09:54:38 -0700
Subject: [PATCH] feat: block bad bots

---
 api/block-bots.js        |  53 ++++
 package.json             |   2 +-
 src/_data/json/robots.js | 658 +--------------------------------------
 3 files changed, 61 insertions(+), 652 deletions(-)
 create mode 100644 api/block-bots.js

diff --git a/api/block-bots.js b/api/block-bots.js
new file mode 100644
index 00000000..bbaeca79
--- /dev/null
+++ b/api/block-bots.js
@@ -0,0 +1,53 @@
+import { getStore } from '@netlify/blobs'
+import { DateTime } from 'luxon'
+
+const botUas = [
+  'AdsBot-Google',
+  'Amazonbot',
+  'anthropic-ai',
+  'Applebot',
+  'AwarioRssBot',
+  'AwarioSmartBot',
+  'Bytespider',
+  'CCBot',
+  'ChatGPT',
+  'ChatGPT-User',
+  'Claude-Web',
+  'ClaudeBot',
+  'cohere-ai',
+  'DataForSeoBot',
+  'Diffbot',
+  'FacebookBot',
+  'FacebookBot',
+  'Google-Extended',
+  'GPTBot',
+  'ImagesiftBot',
+  'magpie-crawler',
+  'omgili',
+  'Omgilibot',
+  'peer39_crawler',
+  'PerplexityBot',
+  'YouBot'
+]
+
+export default async (request, context) => {
+  const ua = request.headers.get('user-agent');
+  const bots = getStore('bots')
+  let isBot = false
+
+  botUas.forEach(u => {
+    if (ua.toLowerCase().includes(u.toLowerCase())) {
+      isBot = true
+    }
+  })
+
+  if (isBot) await bots.set(ua, DateTime.now())
+
+  const response = isBot ? new Response(null, { status: 401 }) : await context.next();
+
+  return response
+};
+
+export const config = {
+  path: '/*',
+}
\ No newline at end of file
diff --git a/package.json b/package.json
index 56c7fd64..70093f76 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "coryd.dev",
-  "version": "10.1.2",
+  "version": "10.2.2",
   "description": "The source for my personal site. Built using 11ty.",
   "type": "module",
   "scripts": {
diff --git a/src/_data/json/robots.js b/src/_data/json/robots.js
index 7301ebff..a84c9ef0 100644
--- a/src/_data/json/robots.js
+++ b/src/_data/json/robots.js
@@ -1,675 +1,31 @@
 export default [
-  '01h4x.com',
-  '360Spider',
-  '404checker',
-  '404enemy',
-  '80legs',
-  'ADmantX',
-  'AIBOT',
-  'ALittle Client',
-  'ASPSeek',
-  'Abonti',
-  'Aboundex',
-  'Aboundexbot',
-  'Acunetix',
   'AdsBot-Google',
-  'AdsTxtCrawlerTP',
-  'AfD-Verbotsverfahren',
-  'AhrefsBot',
-  'AiHitBot',
-  'Aipbot',
-  'Alexibot',
-  'AllSubmitter',
-  'Alligator',
-  'AlphaBot',
   'Amazonbot',
-  'Anarchie',
-  'Anarchy',
-  'Anarchy99',
-  'Ankit',
-  'Anthill',
   'anthropic-ai',
-  'Apexoo',
   'Applebot',
-  'Aspiegel',
-  'Asterias',
-  'Atomseobot',
-  'Attach',
   'AwarioRssBot',
   'AwarioSmartBot',
-  'BBBike',
-  'BDCbot',
-  'BDFetch',
-  'BLEXBot',
-  'BackDoorBot',
-  'BackStreet',
-  'BackWeb',
-  'Backlink-Ceck',
-  'BacklinkCrawler',
-  'Badass',
-  'Bandit',
-  'Barkrowler',
-  'BatchFTP',
-  'Battleztar Bazinga',
-  'BetaBot',
-  'Bigfoot',
-  'Bitacle',
-  'BlackWidow',
-  'Black Hole',
-  'Blackboard',
-  'Blow',
-  'BlowFish',
-  'Boardreader',
-  'Bolt',
-  'BotALot',
-  'Brandprotect',
-  'Brandwatch',
-  'Buck',
-  'Buddy',
-  'BuiltBotTough',
-  'BuiltWith',
-  'Bullseye',
-  'BunnySlippers',
-  'BuzzSumo',
   'Bytespider',
-  'CATExplorador',
   'CCBot',
-  'CODE87',
-  'CSHttp',
-  'Calculon',
-  'CazoodleBot',
-  'Cegbfeieh',
-  'CensysInspect',
   'ChatGPT-User',
-  'CheTeam',
-  'CheeseBot',
-  'CherryPicker',
-  'ChinaClaw',
-  'Chlooe',
-  'Citoid',
-  'Claritybot',
   'ClaudeBot',
   'Claude-Web',
-  'Cliqzbot',
-  'Cloud mapping',
-  'Cocolyzebot',
-  'Cogentbot',
-  'Collector',
-  'Copier',
-  'CopyRightCheck',
-  'Copyscape',
-  'Cosmos',
-  'Craftbot',
-  'Crawling at Home Project',
-  'CrazyWebCrawler',
-  'Crescent',
-  'CrunchBot',
-  'Curious',
-  'Custo',
-  'CyotekWebCopy',
-  'DBLBot',
-  'DIIbot',
-  'DSearch',
-  'DTS Agent',
-  'DataCha0s',
-  'DatabaseDriverMysqli',
-  'Demon',
-  'Deusu',
-  'Devil',
-  'Diffbot',
-  'Digincore',
-  'DigitalPebble',
-  'Dirbuster',
-  'Disco',
-  'Discobot',
-  'Discoverybot',
-  'Dispatch',
-  'DittoSpyder',
-  'DnBCrawler-Analytics',
-  'DnyzBot',
-  'DomCopBot',
-  'DomainAppender',
-  'DomainCrawler',
-  'DomainSigmaCrawler',
-  'DomainStatsBot',
-  'Domains Project',
-  'Dotbot',
-  'Download Wonder',
-  'Dragonfly',
-  'Drip',
-  'ECCP/1.0',
-  'EMail Siphon',
-  'EMail Wolf',
-  'EasyDL',
-  'Ebingbong',
-  'Ecxi',
-  'EirGrabber',
-  'EroCrawler',
-  'Evil',
-  'Exabot',
-  'Express WebPictures',
-  'ExtLinksBot',
-  'Extractor',
-  'ExtractorPro',
-  'Extreme Picture Finder',
-  'EyeNetIE',
-  'Ezooms',
-  'FDM',
-  'FHscan',
+  'cohere-ai',
+  'DataForSeoBot',
   'FacebookBot',
-  'FemtosearchBot',
-  'Fimap',
-  'Firefox/7.0',
-  'FlashGet',
-  'Flunky',
-  'Foobot',
-  'Freeuploader',
   'FriendlyCrawler',
-  'FrontPage',
-  'Fuzz',
-  'FyberSpider',
-  'Fyrebot',
-  'G-i-g-a-b-o-t',
-  'GPTBot',
-  'GT::WWW',
-  'GalaxyBot',
-  'Genieo',
-  'GermCrawler',
-  'GetRight',
-  'GetWeb',
-  'Getintent',
-  'Gigabot',
-  'Go!Zilla',
-  'Go-Ahead-Got-It',
-  'GoZilla',
   'Google-Extended',
   'GoogleOther',
-  'Gotit',
-  'GrabNet',
-  'Grabber',
-  'Grafula',
-  'GrapeFX',
-  'GrapeshotCrawler',
-  'GridBot',
-  'HEADMasterSEO',
-  'HMView',
-  'HTMLparser',
-  'HTTP::Lite',
-  'HTTrack',
-  'Haansoft',
-  'HaosouSpider',
-  'Harvest',
-  'Havij',
-  'Heritrix',
-  'Hloader',
-  'HonoluluBot',
-  'Humanlinks',
-  'HybridBot',
-  'IDBTE4M',
-  'IDBot',
-  'IRLbot',
-  'Iblog',
-  'Id-search',
-  'IlseBot',
-  'Image Fetch',
-  'Image Sucker',
+  'GPTBot',
   'ImagesiftBot',
-  'IndeedBot',
-  'Indy Library',
-  'InfoNaviRobot',
-  'InfoTekies',
-  'Intelliseek',
-  'InterGET',
-  'InternetSeer',
-  'Internet Ninja',
-  'Iria',
-  'Iskanie',
-  'IstellaBot',
-  'JOC Web Spider',
-  'JamesBOT',
-  'Jbrofuzz',
-  'JennyBot',
-  'JetCar',
-  'Jetty',
-  'JikeSpider',
-  'Joomla',
-  'Jorgee',
-  'JustView',
-  'Jyxobot',
-  'Kenjin Spider',
-  'Keybot Translation-Search-Machine',
-  'Keyword Density',
-  'Kinza',
-  'Kozmosbot',
-  'LNSpiderguy',
-  'LWP::Simple',
-  'Lanshanbot',
-  'Larbin',
-  'Leap',
-  'LeechFTP',
-  'LeechGet',
-  'LexiBot',
-  'Lftp',
-  'LibWeb',
-  'Libwhisker',
-  'LieBaoFast',
-  'Lightspeedsystems',
-  'Likse',
-  'LinkScan',
-  'LinkWalker',
-  'Linkbot',
-  'LinkextractorPro',
-  'LinkpadBot',
-  'LinksManager',
-  'LinqiaMetadataDownloaderBot',
-  'LinqiaRSSBot',
-  'LinqiaScrapeBot',
-  'Lipperhey',
-  'Lipperhey Spider',
-  'Litemage_walker',
-  'Lmspider',
-  'Ltx71',
-  'MFC_Tear_Sample',
-  'MIDown tool',
-  'MIIxpc',
-  'MJ12bot',
-  'MQQBrowser',
-  'MSFrontPage',
-  'MSIECrawler',
-  'MTRobot',
-  'Mag-Net',
-  'Magnet',
-  'Mail.RU_Bot',
-  'Majestic-SEO',
-  'Majestic12',
-  'Majestic SEO',
-  'MarkMonitor',
-  'MarkWatch',
-  'Mass Downloader',
-  'Masscan',
-  'Mata Hari',
-  'MauiBot',
-  'Mb2345Browser',
-  'MeanPath Bot',
-  'Meanpathbot',
-  'Mediatoolkitbot',
-  'MegaIndex.ru',
+  'magpie-crawler',
   'Meltwater',
-  'Metauri',
-  'MicroMessenger',
-  'Microsoft Data Access',
-  'Microsoft URL Control',
-  'Minefield',
-  'Mister PiX',
-  'Moblie Safari',
-  'Mojeek',
-  'Mojolicious',
-  'MolokaiBot',
-  'Morfeus Fucking Scanner',
-  'Mozlila',
-  'Mr.4x3',
-  'Msrabot',
-  'Musobot',
-  'NICErsPRO',
-  'NPbot',
-  'Name Intelligence',
-  'Nameprotect',
-  'Navroad',
-  'NearSite',
-  'Needle',
-  'Nessus',
-  'NetAnts',
-  'NetLyzer',
-  'NetMechanic',
-  'NetSpider',
-  'NetZIP',
-  'Net Vampire',
-  'Netcraft',
-  'Nettrack',
-  'Netvibes',
-  'NextGenSearchBot',
-  'Nibbler',
-  'Niki-bot',
-  'Nikto',
-  'NimbleCrawler',
-  'Nimbostratus',
-  'Ninja',
-  'Nmap',
-  'Nuclei',
-  'Nutch',
-  'Octopus',
-  'Offline Explorer',
-  'Offline Navigator',
-  'OnCrawl',
-  'OpenLinkProfiler',
-  'OpenVAS',
-  'Openfind',
-  'Openvas',
-  'OrangeBot',
-  'OrangeSpider',
-  'OutclicksBot',
-  'OutfoxBot',
-  'PECL::HTTP',
-  'PHPCrawl',
-  'POE-Component-Client-HTTP',
-  'PageAnalyzer',
-  'PageGrabber',
-  'PageScorer',
-  'PageThing.com',
-  'Page Analyzer',
-  'Pandalytics',
-  'Panscient',
-  'Papa Foto',
-  'Pavuk',
-  'PeoplePal',
+  'omgili',
+  'omgilibot',
   'peer39_crawler',
   'peer39_crawler/1.0',
   'PerplexityBot',
-  'Petalbot',
-  'Pi-Monster',
-  'Picscout',
-  'Picsearch',
-  'PictureFinder',
-  'Piepmatz',
-  'Pimonster',
-  'Pixray',
-  'PleaseCrawl',
-  'Pockey',
-  'ProPowerBot',
-  'ProWebWalker',
-  'Probethenet',
-  'Proximic',
-  'Psbot',
-  'Pu_iN',
-  'Pump',
-  'PxBroker',
-  'PyCurl',
-  'QueryN Metasearch',
-  'Quick-Crawler',
-  'RSSingBot',
-  'Rainbot',
-  'RankActive',
-  'RankActiveLinkBot',
-  'RankFlex',
-  'RankingBot',
-  'RankingBot2',
-  'Rankivabot',
-  'RankurBot',
-  'Re-re',
-  'ReGet',
-  'RealDownload',
-  'Reaper',
-  'RebelMouse',
-  'Recorder',
-  'RedesScrapy',
-  'RepoMonkey',
-  'Ripper',
-  'RocketCrawler',
-  'Rogerbot',
-  'SBIder',
-  'SEOkicks',
-  'SEOkicks-Robot',
-  'SEOlyticsCrawler',
-  'SEOprofiler',
-  'SEOstats',
-  'SISTRIX',
-  'SMTBot',
-  'SalesIntelligent',
-  'ScanAlert',
-  'Scanbot',
-  'ScoutJet',
-  'Scrapy',
-  'Screaming',
-  'ScreenerBot',
-  'ScrepyBot',
-  'Searchestate',
-  'SearchmetricsBot',
-  'Seekport',
-  'SeekportBot',
-  'SemanticJuice',
-  'Semrush',
-  'SemrushBot',
-  'SentiBot',
-  'SenutoBot',
-  'SeoSiteCheckup',
-  'SeobilityBot',
-  'Seomoz',
-  'Shodan',
-  'Siphon',
-  'SiteCheckerBotCrawler',
-  'SiteExplorer',
-  'SiteLockSpider',
-  'SiteSnagger',
-  'SiteSucker',
-  'Site Sucker',
-  'Sitebeam',
-  'Siteimprove',
-  'Sitevigil',
-  'SlySearch',
-  'SmartDownload',
-  'Snake',
-  'Snapbot',
-  'Snoopy',
-  'SocialRankIOBot',
-  'Sociscraper',
-  'Sogou web spider',
-  'Sosospider',
-  'Sottopop',
-  'SpaceBison',
-  'Spammen',
-  'SpankBot',
-  'Spanner',
-  'Spbot',
-  'Spinn3r',
-  'SputnikBot',
-  'Sqlmap',
-  'Sqlworm',
-  'Sqworm',
-  'Steeler',
-  'Stripper',
-  'Sucker',
-  'Sucuri',
-  'SuperBot',
-  'SuperHTTP',
-  'Surfbot',
-  'SurveyBot',
-  'Suzuran',
-  'Swiftbot',
-  'Szukacz',
-  'T0PHackTeam',
-  'T8Abot',
-  'Teleport',
-  'TeleportPro',
-  'Telesoft',
-  'Telesphoreo',
-  'Telesphorep',
-  'TheNomad',
-  'The Intraformant',
-  'Thumbor',
-  'TightTwatBot',
-  'TinyTestBot',
-  'Titan',
-  'Toata',
-  'Toweyabot',
-  'Tracemyfile',
-  'Trendiction',
-  'Trendictionbot',
-  'True_Robot',
-  'Turingos',
-  'Turnitin',
-  'TurnitinBot',
-  'TwengaBot',
-  'Twice',
-  'Typhoeus',
-  'URLy.Warning',
-  'URLy Warning',
-  'UnisterBot',
-  'Upflow',
-  'V-BOT',
-  'VB Project',
-  'VCI',
-  'Vacuum',
-  'Vagabondo',
-  'VelenPublicWebCrawler',
-  'VeriCiteCrawler',
-  'VidibleScraper',
-  'Virusdie',
-  'VoidEYE',
-  'Voil',
-  'Voltron',
-  'WASALive-Bot',
-  'WBSearchBot',
-  'WEBDAV',
-  'WISENutbot',
-  'WPScan',
-  'WWW-Collector-E',
-  'WWW-Mechanize',
-  'WWW::Mechanize',
-  'WWWOFFLE',
-  'Wallpapers',
-  'Wallpapers/3.0',
-  'WallpapersHD',
-  'WeSEE',
-  'WebAuto',
-  'WebBandit',
-  'WebCollage',
-  'WebCopier',
-  'WebEnhancer',
-  'WebFetch',
-  'WebFuck',
-  'WebGo IS',
-  'WebImageCollector',
-  'WebLeacher',
-  'WebPix',
-  'WebReaper',
-  'WebSauger',
-  'WebStripper',
-  'WebSucker',
-  'WebWhacker',
-  'WebZIP',
-  'Web Auto',
-  'Web Collage',
-  'Web Enhancer',
-  'Web Fetch',
-  'Web Fuck',
-  'Web Pix',
-  'Web Sauger',
-  'Web Sucker',
-  'Webalta',
-  'WebmasterWorldForumBot',
-  'Webshag',
-  'WebsiteExtractor',
-  'WebsiteQuester',
-  'Website Quester',
-  'Webster',
-  'Whack',
-  'Whacker',
-  'Whatweb',
-  'Who.is Bot',
-  'Widow',
-  'WinHTTrack',
-  'WiseGuys Robot',
-  'Wonderbot',
-  'Woobot',
-  'Wotbox',
-  'Wprecon',
-  'Xaldon WebSpider',
-  'Xaldon_WebSpider',
-  'Xenu',
-  'YoudaoBot',
-  'Zade',
-  'Zauba',
-  'Zermelo',
-  'Zeus',
-  'Zitebot',
-  'ZmEu',
-  'ZoomBot',
-  'ZoominfoBot',
-  'ZumBot',
-  'ZyBorg',
-  'adscanner',
-  'anthropic-ai',
-  'archive.org_bot',
-  'arquivo-web-crawler',
-  'arquivo.pt',
-  'autoemailspider',
-  'backlink-check',
-  'cah.io.community',
-  'check1.exe',
-  'clark-crawler',
-  'coccocbot',
-  'cognitiveseo',
-  'cohere-ai',
-  'com.plumanalytics',
-  'crawl.sogou.com',
-  'crawler.feedback',
-  'crawler4j',
-  'dataforseo.com',
-  'dataforseobot',
-  'demandbase-bot',
-  'domainsproject.org',
-  'eCatch',
-  'evc-batch',
-  'FacebookBot',
-  'facebookscraper',
-  'gopher',
-  'heritrix',
-  'imagesift.com',
-  'instabid',
-  'internetVista monitor',
-  'ips-agent',
-  'isitwp.com',
-  'iubenda-radar',
-  'linkdexbot',
-  'lwp-request',
-  'lwp-trivial',
-  'magpie-crawler',
-  'meanpathbot',
-  'mediawords',
-  'muhstik-scan',
-  'netEstate NE Crawler',
-  'oBot',
-  'omgili',
-  'omgilibot',
-  'openai',
-  'openai.com',
-  'page scorer',
-  'pcBrowser',
-  'plumanalytics',
-  'polaris version',
-  'probe-image-size',
-  'ripz',
-  'SEMrushBot',
-  's1z.ru',
-  'satoristudio.net',
-  'scalaj-http',
-  'scan.lol',
+  'PiplBot',
   'Seekr',
-  'seobility',
-  'seocompany.store',
-  'seoscanners',
-  'seostar',
-  'serpstatbot',
-  'sexsearcher',
-  'sitechecker.pro',
-  'siteripz',
-  'sogouspider',
-  'sp_auditbot',
-  'spyfu',
-  'sysscan',
-  'tAkeOut',
-  'trendiction.com',
-  'trendiction.de',
-  'ubermetrics-technologies.com',
-  'voyagerx.com',
-  'webgains-bot',
-  'webmeup-crawler',
-  'webpros.com',
-  'webprosbot',
-  'x09Mozilla',
-  'x22Mozilla',
-  'xpymep1.exe',
   'YouBot',
-  'zauba.io',
-  'zgrab',
 ]
\ No newline at end of file