# ============================================================================
# Robots.txt for Pressonify.ai
# AI Discovery Protocol v2.0 Compliant
# Last Updated: 2025-12-08
# ============================================================================

# Sitemaps for Indexing
Sitemap: https://pressonify.ai/sitemap.xml
Sitemap: https://pressonify.ai/sitemap-ai.xml

# ============================================================================
# AI DISCOVERY ENDPOINTS (ADP v2.1)
# These endpoints are specifically designed for LLM and AI crawler ingestion
# ============================================================================

# Master Discovery Manifest (Industry Standard)
# Format: JSON | Updates: Daily | Contains: all AI endpoint references
Allow: /.well-known/ai.json

# Security Contact (RFC 9116)
Allow: /.well-known/security.txt

# Meta-Index: Entry point for AI discovery, maps all resources
# Format: JSON | Updates: Hourly | Contains: endpoint catalog, entity counts
Allow: /ai-discovery.json
Allow: /ai-discovery.md

# Knowledge Graph: Structured entity catalog using Schema.org vocabulary
# Format: JSON-LD | Updates: Hourly | Contains: Organizations, NewsArticles, Persons
Allow: /knowledge-graph.json

# LLM Context Documents: Human-readable markdown for AI assistants
# Format: Markdown/Text | Updates: Daily | Contains: platform overview, latest PRs
Allow: /llms.txt
Allow: /llms-full.txt

# RSS/Atom/JSON Feeds: Syndication feeds for content updates
# Format: XML/JSON | Updates: Real-time on publish
Allow: /feed
Allow: /rss
Allow: /feed.json

# Delta Feed: Incremental updates for AI crawlers
# Format: JSON | Updates: Real-time | Contains: changes since timestamp
Allow: /updates.json

# ============================================================================
# STANDARD CRAWLER RULES
# ============================================================================

User-agent: *
Allow: /
Crawl-delay: 1

# ============================================================================
# AI-SPECIFIC CRAWLER PERMISSIONS
# Full access granted to major AI platforms for content indexing
# ============================================================================

# OpenAI (ChatGPT, GPT-4)
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

# Anthropic (Claude)
User-agent: Claude-Web
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: anthropic-ai
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

# Google AI (Gemini, Bard)
User-agent: Google-Extended
Allow: /

# Apple (Siri, Apple Intelligence)
User-agent: Applebot-Extended
Allow: /

# Microsoft (Copilot)
User-agent: Bingbot
Allow: /

# You.com
User-agent: YouBot
Allow: /

# Cohere
User-agent: cohere-ai
Allow: /

# Meta AI
User-agent: Meta-ExternalAgent
Allow: /

# ============================================================================
# PROTECTED PATHS
# Administrative and internal endpoints
# ============================================================================

# Admin and dashboard (requires authentication)
Disallow: /admin/
Disallow: /dashboard/
Disallow: /my-releases/
Disallow: /user-dashboard

# Internal API endpoints
Disallow: /api/v1/internal/

# Authentication flows
Disallow: /login
Disallow: /register
Disallow: /forgot-password

# Legacy URL format (redirects to /news/)
Disallow: /pr/

# ============================================================================
# PUBLIC CONTENT - EXPLICITLY ALLOWED
# ============================================================================

# Press release archive and individual articles
Allow: /news/
Allow: /news/*

# Public pages
Allow: /pricing
Allow: /about
Allow: /how-it-works
Allow: /blog/
Allow: /contact
Allow: /free-tools
Allow: /ai-visibility-checker
Allow: /free-headline-generator
Allow: /glossary
Allow: /changelog

# API documentation
Allow: /api/docs

# Static assets
Allow: /static/

# ============================================================================
# NOTES FOR AI CRAWLERS
# ============================================================================
#
# For structured data ingestion, use these endpoints in order:
# 1. /ai-discovery.json - Get the meta-index first
# 2. /knowledge-graph.json - Full entity graph with relationships
# 3. /llms.txt - Human-readable context and latest content
#
# All endpoints support:
# - CORS (Access-Control-Allow-Origin: *)
# - HTTP caching (ETag, Last-Modified)
# - Content versioning
#
# Contact: support@pressonify.ai
# Documentation: https://pressonify.ai/blog/seo-to-aeo-geo-llmo-adp-evolution
# ============================================================================