# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# robots.txt for voelkslieder.com
# Last updated: 2026-05-07
#
# Policy:
#   Public, human-readable search engines are welcome.
#   AI training and dataset-collection crawlers are not.
#   The signal is for listeners, not for extractive systems.

# ---------------------------------------------------------------------------
# Allowed: human-facing search engines
# ---------------------------------------------------------------------------

User-agent: Googlebot
Allow: /

User-agent: Googlebot-Image
Allow: /

User-agent: Bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Slurp
Allow: /

User-agent: Applebot
Allow: /

User-agent: YandexBot
Allow: /

User-agent: Baiduspider
Allow: /

User-agent: Qwantify
Allow: /

User-agent: Mojeekbot
Allow: /

# ---------------------------------------------------------------------------
# Denied: AI training and dataset-collection crawlers
# ---------------------------------------------------------------------------

# OpenAI
User-agent: GPTBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: OAI-SearchBot
Disallow: /

# Google AI training (separate from search)
User-agent: Google-Extended
Disallow: /

# Anthropic
User-agent: anthropic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: ClaudeBot
Disallow: /

# Cohere
User-agent: cohere-ai
Disallow: /

User-agent: cohere-training-data-crawler
Disallow: /

# Perplexity
User-agent: PerplexityBot
Disallow: /

User-agent: Perplexity-User
Disallow: /

# Common Crawl (feeds many AI datasets)
User-agent: CCBot
Disallow: /

# Meta
User-agent: FacebookBot
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: meta-externalagent
Disallow: /

User-agent: Meta-ExternalFetcher
Disallow: /

# ByteDance / TikTok
User-agent: Bytespider
Disallow: /

# Amazon
User-agent: Amazonbot
Disallow: /

# AI2
User-agent: AI2Bot
Disallow: /

# Diffbot
User-agent: Diffbot
Disallow: /

# Image-collecting AI scrapers
User-agent: ImagesiftBot
Disallow: /

User-agent: img2dataset
Disallow: /

# Aggregators
User-agent: omgili
Disallow: /

User-agent: omgilibot
Disallow: /

User-agent: magpie-crawler
Disallow: /

User-agent: Timpibot
Disallow: /

User-agent: YouBot
Disallow: /

# SEO-extraction crawlers (not AI training, but extractive)
User-agent: SemrushBot
Disallow: /

User-agent: AhrefsBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: PetalBot
Disallow: /

# ---------------------------------------------------------------------------
# Default rules for everyone else
# ---------------------------------------------------------------------------

User-agent: *
# Don't index download endpoints; the Cloudflare Worker enforces the daily window
Disallow: /songs/*/stems.zip
Disallow: /songs/*/*.wav
Disallow: /songs/*/*.mid
Disallow: /api/

# Sitemap
Sitemap: https://voelkslieder.com/sitemap.xml