# https://www.robotstxt.org/robotstxt.html
# Allow all legitimate crawlers to access public content
User-agent: *
Allow: /
# Disallow API routes and internal paths
Disallow: /api/
Disallow: /_next/
Disallow: /_vercel/
# Disallow images and media assets
Disallow: /logos/
Disallow: /certifications/
Disallow: /references/
Disallow: /images/
Disallow: /videos/
Disallow: /*.jpg$
Disallow: /*.jpeg$
Disallow: /*.png$
Disallow: /*.gif$
Disallow: /*.webp$
Disallow: /*.svg$
Disallow: /*.ico$
Disallow: /*.mp4$
Disallow: /*.webm$
# Disallow authentication pages (not useful for search)
Disallow: /*/sign-in/
Disallow: /*/sign-up/
Disallow: /*/dashboard/
# Disallow query strings that create duplicate content
Disallow: /*?*
# Crawl-delay for polite crawling (optional, respected by some bots)
Crawl-delay: 1
# Google (no crawl-delay support, but explicit allow)
User-agent: Googlebot
Allow: /
Disallow: /api/
Disallow: /_next/
Disallow: /logos/
Disallow: /certifications/
Disallow: /references/
Disallow: /images/
Disallow: /videos/
Disallow: /*/sign-in/
Disallow: /*/sign-up/
Disallow: /*/dashboard/
# Block Google Images crawler entirely
User-agent: Googlebot-Image
Disallow: /
# Block Bing Images crawler
User-agent: BingPreview
Disallow: /
User-agent: msnbot-media
Disallow: /
# Block Yandex Images crawler
User-agent: YandexImages
Disallow: /
# Bing
User-agent: Bingbot
Allow: /
Disallow: /api/
Disallow: /_next/
Disallow: /*/sign-in/
Disallow: /*/sign-up/
Disallow: /*/dashboard/
Crawl-delay: 1
# DuckDuckGo
User-agent: DuckDuckBot
Allow: /
Disallow: /api/
Disallow: /_next/
Disallow: /*/sign-in/
Disallow: /*/sign-up/
Disallow: /*/dashboard/
# Yahoo/Slurp
User-agent: Slurp
Allow: /
Disallow: /api/
Disallow: /_next/
Disallow: /*/sign-in/
Disallow: /*/sign-up/
Disallow: /*/dashboard/
Crawl-delay: 1
# Yandex
User-agent: Yandex
Allow: /
Disallow: /api/
Disallow: /_next/
Disallow: /*/sign-in/
Disallow: /*/sign-up/
Disallow: /*/dashboard/
Crawl-delay: 1
# Baidu
User-agent: Baiduspider
Allow: /
Disallow: /api/
Disallow: /_next/
Disallow: /*/sign-in/
Disallow: /*/sign-up/
Disallow: /*/dashboard/
Crawl-delay: 2
# Facebook crawler
User-agent: facebookexternalhit
Allow: /
# Twitter crawler
User-agent: Twitterbot
Allow: /
# LinkedIn crawler
User-agent: LinkedInBot
Allow: /
# Block AI training crawlers
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: cohere-ai
Disallow: /
User-agent: Diffbot
Disallow: /
User-agent: ImagesiftBot
Disallow: /
User-agent: Omgilibot
Disallow: /
User-agent: Omgili
Disallow: /
User-agent: FacebookBot
Disallow: /
User-agent: PerplexityBot
Disallow: /
# Block aggressive/problematic crawlers
User-agent: AhrefsBot
Disallow: /
User-agent: SemrushBot
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: DotBot
Disallow: /
User-agent: BLEXBot
Disallow: /
User-agent: PetalBot
Disallow: /
User-agent: MegaIndex
Disallow: /
User-agent: SEOkicks
Disallow: /
User-agent: serpstatbot
Disallow: /
User-agent: DataForSeoBot
Disallow: /
# Block scraper bots
User-agent: Scrapy
Disallow: /
User-agent: wget
Disallow: /
User-agent: curl
Disallow: /
User-agent: HTTrack
Disallow: /
User-agent: python-requests
Disallow: /
# Sitemap location
Sitemap: https://tariqkhan.co.uk/sitemap.xml