# https://www.robotstxt.org/robotstxt.html # Allow all legitimate crawlers to access public content User-agent: * Allow: / # Disallow API routes and internal paths Disallow: /api/ Disallow: /_next/ Disallow: /_vercel/ # Disallow images and media assets Disallow: /logos/ Disallow: /certifications/ Disallow: /references/ Disallow: /images/ Disallow: /videos/ Disallow: /*.jpg$ Disallow: /*.jpeg$ Disallow: /*.png$ Disallow: /*.gif$ Disallow: /*.webp$ Disallow: /*.svg$ Disallow: /*.ico$ Disallow: /*.mp4$ Disallow: /*.webm$ # Disallow authentication pages (not useful for search) Disallow: /*/sign-in/ Disallow: /*/sign-up/ Disallow: /*/dashboard/ # Disallow query strings that create duplicate content Disallow: /*?* # Crawl-delay for polite crawling (optional, respected by some bots) Crawl-delay: 1 # Google (no crawl-delay support, but explicit allow) User-agent: Googlebot Allow: / Disallow: /api/ Disallow: /_next/ Disallow: /logos/ Disallow: /certifications/ Disallow: /references/ Disallow: /images/ Disallow: /videos/ Disallow: /*/sign-in/ Disallow: /*/sign-up/ Disallow: /*/dashboard/ # Block Google Images crawler entirely User-agent: Googlebot-Image Disallow: / # Block Bing Images crawler User-agent: BingPreview Disallow: / User-agent: msnbot-media Disallow: / # Block Yandex Images crawler User-agent: YandexImages Disallow: / # Bing User-agent: Bingbot Allow: / Disallow: /api/ Disallow: /_next/ Disallow: /*/sign-in/ Disallow: /*/sign-up/ Disallow: /*/dashboard/ Crawl-delay: 1 # DuckDuckGo User-agent: DuckDuckBot Allow: / Disallow: /api/ Disallow: /_next/ Disallow: /*/sign-in/ Disallow: /*/sign-up/ Disallow: /*/dashboard/ # Yahoo/Slurp User-agent: Slurp Allow: / Disallow: /api/ Disallow: /_next/ Disallow: /*/sign-in/ Disallow: /*/sign-up/ Disallow: /*/dashboard/ Crawl-delay: 1 # Yandex User-agent: Yandex Allow: / Disallow: /api/ Disallow: /_next/ Disallow: /*/sign-in/ Disallow: /*/sign-up/ Disallow: /*/dashboard/ Crawl-delay: 1 # Baidu User-agent: Baiduspider Allow: / Disallow: /api/ Disallow: /_next/ Disallow: /*/sign-in/ Disallow: /*/sign-up/ Disallow: /*/dashboard/ Crawl-delay: 2 # Facebook crawler User-agent: facebookexternalhit Allow: / # Twitter crawler User-agent: Twitterbot Allow: / # LinkedIn crawler User-agent: LinkedInBot Allow: / # Block AI training crawlers User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: Google-Extended Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / User-agent: Bytespider Disallow: / User-agent: cohere-ai Disallow: / User-agent: Diffbot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: Omgilibot Disallow: / User-agent: Omgili Disallow: / User-agent: FacebookBot Disallow: / User-agent: PerplexityBot Disallow: / # Block aggressive/problematic crawlers User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: PetalBot Disallow: / User-agent: MegaIndex Disallow: / User-agent: SEOkicks Disallow: / User-agent: serpstatbot Disallow: / User-agent: DataForSeoBot Disallow: / # Block scraper bots User-agent: Scrapy Disallow: / User-agent: wget Disallow: / User-agent: curl Disallow: / User-agent: HTTrack Disallow: / User-agent: python-requests Disallow: / # Sitemap location Sitemap: https://tariqkhan.co.uk/sitemap.xml