# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# Really Happy Trips - SEO Optimized Robots.txt
# Last Updated: 2026-03-10
# Purpose: Maximum visibility for all search engines and bots

# ==============================
# MAIN CRAWLER ACCESS - OPEN TO ALL
# ==============================

# Allow all crawlers full access to website
User-agent: *
Allow: /

# Sitemap location for better crawling
Sitemap: https://reallyhappytripsladydriver.com/sitemap.xml

# ==============================
# SEO BEST PRACTICES - BLOCK UNNECESSARY CONTENT
# ==============================

# Block dynamic parameters that create duplicate content
Disallow: /*?*
Disallow: /*&*
Disallow: /search?
Disallow: /filter?
Disallow: /sort?

# Block common development areas (generic patterns)
Disallow: /dev/
Disallow: /staging/
Disallow: /test/
Disallow: /backup/
Disallow: /old/
Disallow: /temp/
Disallow: /tmp/

# Block document files that don't need indexing
Disallow: /*.pdf$
Disallow: /*.doc$
Disallow: /*.xls$
Disallow: /*.docx$
Disallow: /*.xlsx$

# ==============================
# ALLOW IMPORTANT FILE TYPES
# ==============================

# Allow all media and content files
Allow: /*.css$
Allow: /*.js$
Allow: /*.jpg$
Allow: /*.jpeg$
Allow: /*.png$
Allow: /*.gif$
Allow: /*.svg$
Allow: /*.webp$
Allow: /*.ico$
Allow: /*.json$
Allow: /*.xml$
Allow: /*.txt$
Allow: /*.pdf$  # Allow PDFs if they contain useful content

# ==============================
# MAJOR SEARCH ENGINE OPTIMIZATION
# ==============================

# Google - Premium access with fast crawling
User-agent: Googlebot
Allow: /
Crawl-delay: 0.2

# Bing - Microsoft's search engine
User-agent: Bingbot
Allow: /
Crawl-delay: 0.3

# Yahoo/Slurp - Allow full access
User-agent: Slurp
Allow: /
Crawl-delay: 0.5

# DuckDuckGo - Privacy-focused search
User-agent: DuckDuckBot
Allow: /
Crawl-delay: 0.5

# Baidu - Chinese search engine
User-agent: Baiduspider
Allow: /
Crawl-delay: 1

# Yandex - Russian search engine
User-agent: Yandex
Allow: /
Crawl-delay: 1

# ==============================
# SOCIAL MEDIA CRAWLERS - ALLOW ALL
# ==============================

# Facebook/Open Graph
User-agent: facebookexternalhit
Allow: /

# Twitter/X Cards
User-agent: Twitterbot
Allow: /

# LinkedIn
User-agent: LinkedInBot
Allow: /

# WhatsApp
User-agent: WhatsApp
Allow: /

# Pinterest
User-agent: Pinterestbot
Allow: /

# Instagram/Facebook
User-agent: Instagrambot
Allow: /

# Telegram
User-agent: TelegramBot
Allow: /

# ==============================
# SEO TOOLS - ALLOW MAJOR PLATFORMS
# ==============================

# Google PageSpeed Insights
User-agent: Google-PageSpeed-Insights
Allow: /

# Google Mobile-Friendly Test
User-agent: Google-Mobile-Friendly
Allow: /

# SEMrush - SEO tool (reduced crawl rate)
User-agent: SemrushBot
Allow: /
Crawl-delay: 2

# Ahrefs - SEO tool (reduced crawl rate)
User-agent: AhrefsBot
Allow: /
Crawl-delay: 2

# Moz - SEO tool
User-agent: rogerbot
Allow: /
Crawl-delay: 2

# ==============================
# AGGRESSIVE BOTS - ALLOW BUT CONTROL
# ==============================

# MJ12bot - SEO crawler
User-agent: MJ12bot
Allow: /
Crawl-delay: 3

# Dotbot - SEO crawler
User-agent: dotbot
Allow: /
Crawl-delay: 3

# ==============================
# ARCHIVE SERVICES - ALLOW WEB HISTORY
# ==============================

# Internet Archive
User-agent: ia_archiver
Allow: /
Crawl-delay: 2

# Archive.today
User-agent: archive.org_bot
Allow: /
Crawl-delay: 2

# ==============================
# AI/ML CRAWLERS - ALLOW FOR TRAINING
# ==============================

# OpenAI GPTBot
User-agent: GPTBot
Allow: /
Crawl-delay: 2

# ChatGPT user agent
User-agent: ChatGPT-User
Allow: /
Crawl-delay: 2

# Common Crawl
User-agent: CCBot
Allow: /
Crawl-delay: 2

# Google Extended (AI training)
User-agent: Google-Extended
Allow: /
Crawl-delay: 1

# ==============================
# SPECIALIZED CRAWLERS - ALLOW ALL
# ==============================

# Applebot (Siri, Spotlight)
User-agent: Applebot
Allow: /
Crawl-delay: 1

# Amazonbot (Alexa)
User-agent: Amazonbot
Allow: /
Crawl-delay: 1

# ==============================
# FINAL CATCH-ALL - MAXIMUM VISIBILITY
# ==============================

# Allow any other crawler not specifically mentioned
User-agent: *
Allow: /
Crawl-delay: 1

# ==============================
# ADDITIONAL SEO RECOMMENDATIONS
# ==============================

# This robots.txt is designed for maximum visibility:
# 1. Allows all legitimate search engines and bots
# 2. Only blocks duplicate content and development areas
# 3. Provides crawl delays for aggressive bots to prevent server overload
# 4. Includes all major search engines globally
# 5. Allows social media crawlers for better sharing
# 6. Permits SEO tools for analysis
# 7. Enables AI/ML crawlers for potential inclusion in training data
# 8. Supports archive services for web history

# Submit this robots.txt to Google Search Console for validation
# Monitor crawl stats to ensure optimal performance