feat: support ua-synonyms in robots.json to reduce duplication (#144)

This commit is contained in:
brndnprog 2025-06-11 22:04:39 -07:00
parent cf598b6b71
commit f0606f0eac
2 changed files with 93 additions and 163 deletions

View file

@ -1,10 +1,17 @@
#!/usr/bin/env python3
"""To run these tests just execute this script."""
import json
import re
import unittest
from robots import json_to_txt, json_to_table, json_to_htaccess, json_to_nginx, json_to_haproxy, json_to_caddy
from robots import (
json_to_txt,
json_to_table,
json_to_htaccess,
json_to_nginx,
json_to_haproxy,
json_to_caddy,
)
class RobotsUnittestExtensions:
def loadJson(self, pathname):
@ -14,16 +21,13 @@ class RobotsUnittestExtensions:
def assertEqualsFile(self, f, s):
with open(f, "rt") as f:
f_contents = f.read()
return self.assertMultiLineEqual(f_contents, s)
class TestRobotsTXTGeneration(unittest.TestCase, RobotsUnittestExtensions):
maxDiff = 8192
def setUp(self):
self.robots_dict = self.loadJson("test_files/robots.json")
def test_robots_txt_generation(self):
robots_txt = json_to_txt(self.robots_dict)
self.assertEqualsFile("test_files/robots.txt", robots_txt)
@ -31,10 +35,8 @@ class TestRobotsTXTGeneration(unittest.TestCase, RobotsUnittestExtensions):
class TestTableMetricsGeneration(unittest.TestCase, RobotsUnittestExtensions):
maxDiff = 32768
def setUp(self):
self.robots_dict = self.loadJson("test_files/robots.json")
def test_table_generation(self):
robots_table = json_to_table(self.robots_dict)
self.assertEqualsFile("test_files/table-of-bot-metrics.md", robots_table)
@ -42,53 +44,71 @@ class TestTableMetricsGeneration(unittest.TestCase, RobotsUnittestExtensions):
class TestHtaccessGeneration(unittest.TestCase, RobotsUnittestExtensions):
maxDiff = 8192
def setUp(self):
self.robots_dict = self.loadJson("test_files/robots.json")
def test_htaccess_generation(self):
robots_htaccess = json_to_htaccess(self.robots_dict)
self.assertEqualsFile("test_files/.htaccess", robots_htaccess)
class TestNginxConfigGeneration(unittest.TestCase, RobotsUnittestExtensions):
maxDiff = 8192
def setUp(self):
self.robots_dict = self.loadJson("test_files/robots.json")
def test_nginx_generation(self):
robots_nginx = json_to_nginx(self.robots_dict)
self.assertEqualsFile("test_files/nginx-block-ai-bots.conf", robots_nginx)
class TestHaproxyConfigGeneration(unittest.TestCase, RobotsUnittestExtensions):
maxDiff = 8192
def setUp(self):
self.robots_dict = self.loadJson("test_files/robots.json")
def test_haproxy_generation(self):
robots_haproxy = json_to_haproxy(self.robots_dict)
self.assertEqualsFile("test_files/haproxy-block-ai-bots.txt", robots_haproxy)
class TestRobotsNameCleaning(unittest.TestCase):
def test_clean_name(self):
from robots import clean_robot_name
self.assertEqual(clean_robot_name("PerplexityUser"), "Perplexity-User")
class TestCaddyfileGeneration(unittest.TestCase, RobotsUnittestExtensions):
maxDiff = 8192
def setUp(self):
self.robots_dict = self.loadJson("test_files/robots.json")
def test_caddyfile_generation(self):
robots_caddyfile = json_to_caddy(self.robots_dict)
self.assertEqualsFile("test_files/Caddyfile", robots_caddyfile)
class TestRobotsNameCleaning(unittest.TestCase):
def test_clean_name(self):
from robots import clean_robot_name
self.assertEqual(clean_robot_name("PerplexityUser"), "Perplexity-User")
class TestUASynonymsSupport(unittest.TestCase):
def setUp(self):
self.test_data = {
"MainBot": {
"ua-synonyms": ["mainbot/1.0", "Main-Bot"],
"operator": "TestCorp",
"respect": "No",
"function": "AI Bot",
"frequency": "Daily",
"description": "Used for testing ua-synonyms."
}
}
def test_robots_txt_includes_synonyms(self):
output = json_to_txt(self.test_data)
for variant in ["MainBot", "mainbot/1.0", "Main-Bot"]:
self.assertIn(f"User-agent: {variant}", output)
def test_htaccess_includes_synonyms(self):
output = json_to_htaccess(self.test_data)
for variant in ["MainBot", "mainbot/1.0", "Main-Bot"]:
self.assertIn(re.escape(variant), output)
if __name__ == "__main__":
import os
os.chdir(os.path.dirname(__file__))
unittest.main(verbosity=2)