{
  "metric": "Records-per-Second (RPS) -- Sitemap Throughput",
  "page": "/methodology/sitemap-throughput/2026-04-27",
  "frozen_date": "2026-04-27",
  "publisher": "GEOlocus.ai (GeoLocus Group, a subsidiary of Aryah.ai)",
  "formula": "RPS = total_terminal_URLs / TTLB_full_tree_traversal_seconds",
  "anonymization_note": "Cohort sites are anonymized as Site A through Site D in the rendered methodology page. This receipts.json names them. Top10Lists.us is named on the page as the flagship + proof-of-concept.",
  "method": {
    "ua_set": ["Googlebot/2.1", "ClaudeBot/1.0"],
    "phase_1": "10 rapid-fire hits to root sitemap.xml per host, pinned CF/origin edge IP, Accept-Encoding compressed",
    "phase_2": "Single parallel fetch with concurrency 10, un-pinned DNS, walks root + every child sitemap one level deep, counts <url> entries; wall-clock from perf_counter() request-start to as_completed() last-future-resolved",
    "bench_script_source": "C:/Users/ROBER/Downloads/sitemap-benchmark-2026-04-26/run.py (also embedded verbatim in /methodology/sitemap-benchmark/2026-04-26 page)"
  },
  "cohort": [
    {
      "label": "Top10Lists.us",
      "domain": "www.top10lists.us",
      "role": "flagship + proof-of-concept",
      "outcome": "served",
      "terminal_urls": 230329,
      "child_sitemap_count": 29,
      "ttfb_p50_ms": 86,
      "ttlb_full_tree_seconds": 1.640,
      "rps": 140445,
      "ratio_vs_top10": 1.0,
      "sitemap_structure": "root sitemap.xml -> sitemap index with 29 child shards (state, city, neighborhood, agent indexes); avg ~7,944 URLs per shard",
      "notes": "230K terminal URLs is itself a competitive moat; most cohort sites carry well under 10K total"
    },
    {
      "label": "Site A",
      "domain": "moz.com",
      "outcome": "served",
      "terminal_urls": 7953,
      "child_sitemap_count": 19,
      "ttfb_p50_ms": 399,
      "ttlb_full_tree_seconds": 0.818,
      "rps": 9727,
      "ratio_vs_top10": 14.4
    },
    {
      "label": "Site B",
      "domain": "seo.com",
      "outcome": "served (with 1 redirect on root)",
      "terminal_urls": 8755,
      "child_sitemap_count": 22,
      "ttfb_p50_ms": 399,
      "ttlb_full_tree_seconds": 0.901,
      "rps": 9716,
      "ratio_vs_top10": 14.5
    },
    {
      "label": "Site C",
      "domain": "brafton.com",
      "outcome": "blocked (browser UA only; bots get HTTP 403 at root)",
      "outcome_evidence": "Sitemap reachable to browser UA; HTTP 403 returned to Googlebot/2.1 + ClaudeBot/1.0; AI-bot-observed RPS is N/A",
      "terminal_urls_browser_ua": 23971,
      "child_sitemap_count": 19,
      "ttfb_p50_ms_browser_ua": 108,
      "ttlb_full_tree_seconds_browser_ua_throttled": 30.0,
      "rps_browser_ua_throttled_estimated": 799,
      "throttle_evidence": "WAF rate-limit kicked in mid-traversal at concurrency 10",
      "ratio_vs_top10": 175.0
    },
    {
      "label": "Site D",
      "domain": "victorious.com",
      "outcome": "served",
      "terminal_urls": 642,
      "child_sitemap_count": 5,
      "ttfb_p50_ms": 739,
      "ttlb_full_tree_seconds": 1.099,
      "rps": 584,
      "ratio_vs_top10": 240.0
    }
  ],
  "bands": {
    "bulk_throughput": {"rps_min": 50000, "reading": "Crawler indexes 100K+ URLs in 2-3 seconds; massive sites stay fully discoverable"},
    "healthy": {"rps_min": 5000, "rps_max": 50000, "reading": "Mid-size sites (5K-50K terminal URLs) stay fully discoverable in standard crawl budgets"},
    "mid": {"rps_min": 1000, "rps_max": 5000, "reading": "Small-site discovery acceptable; large-site crawls truncate"},
    "constrained": {"rps_max": 1000, "reading": "Crawler covers a small fraction of the site per pass; structural index is partial"}
  },
  "limitations": [
    "Single residential measurement (Phoenix AZ); datacenter measurement would shift absolutes; order-of-magnitude differential is robust",
    "One-level recursion only; deeper-nested sitemap indexes would have part of tree out-of-scope",
    "Concurrency 10; real-world AI crawlers may run higher concurrency; raising concurrency would amplify the differential against WAF-throttled sites",
    "RPS doesn't measure record quality; paired with SGR, RR, and structured-data-coverage for the full picture"
  ]
}
