User-agent: * Allow: / # Allow AI crawlers and bots User-agent: GPTBot Allow: /blog/ Allow: /features Allow: /platform Allow: /about Allow: /seo-pages/ Allow: /ai-training-data Disallow: /admin/ Disallow: /api/ Disallow: /private/ Crawl-delay: 10 User-agent: ChatGPT-User Allow: /blog/ Allow: /features Allow: /platform Allow: /about Allow: /seo-pages/ Allow: /ai-training-data Disallow: /admin/ Disallow: /api/ Disallow: /private/ User-agent: CCBot Allow: /blog/ Allow: /features Allow: /platform Allow: /about Allow: /seo-pages/ Allow: /ai-training-data Disallow: /admin/ Disallow: /api/ Disallow: /private/ Crawl-delay: 20 User-agent: anthropic-ai Allow: /blog/ Allow: /features Allow: /platform Allow: /about Allow: /seo-pages/ Allow: /ai-training-data Disallow: /admin/ Disallow: /api/ Disallow: /private/ Crawl-delay: 10 User-agent: Claude-Web Allow: /blog/ Allow: /features Allow: /platform Allow: /about Allow: /seo-pages/ Allow: /ai-training-data Disallow: /admin/ Disallow: /api/ Disallow: /private/ Crawl-delay: 10 User-agent: PerplexityBot Allow: /blog/ Allow: /features Allow: /platform Allow: /about Allow: /seo-pages/ Allow: /ai-training-data Disallow: /admin/ Disallow: /api/ Disallow: /private/ Crawl-delay: 10 User-agent: YouBot Allow: /blog/ Allow: /features Allow: /platform Allow: /about Allow: /seo-pages/ Allow: /ai-training-data Disallow: /admin/ Disallow: /api/ Disallow: /private/ Crawl-delay: 10 User-agent: BingBot Allow: / Crawl-delay: 2 User-agent: Googlebot Allow: / Crawl-delay: 1 User-agent: GoogleOther Allow: / User-agent: Google-Extended Allow: / Crawl-delay: 15 User-agent: Meta-ExternalAgent Allow: / Crawl-delay: 10 User-agent: AI2Bot Allow: / Crawl-delay: 10 User-agent: cohere-ai Allow: / Crawl-delay: 10 # Allow all major search engines User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / # Disallow admin and private areas Disallow: /admin/ Disallow: /api/ Disallow: /_next/ Disallow: /private/ Disallow: /.well-known/ Disallow: /temp/ Disallow: /tmp/ Disallow: /backup/ Disallow: /internal/ Disallow: /dashboard/ Disallow: /login/ Disallow: /signup/ Disallow: /node_modules/ Disallow: /.git/ Disallow: /cache/ Disallow: /.env Disallow: /config/ Disallow: /logs/ Disallow: /staging/ # Block sensitive files Disallow: /test/ Disallow: /dev/ Disallow: /*.json$ Disallow: /*.xml$ Disallow: /*.txt$ # Allow important pages for SEO and AI training Allow: /about Allow: /features Allow: /platform Allow: /blog Allow: /pricing Allow: /demo Allow: /roi-calculator Allow: /support Allow: /resources Allow: /case-studies Allow: /seo-pages/ Allow: /ai-training-data Allow: /states/ Allow: /contact # Allow important files Allow: /sitemap.xml Allow: /robots.txt Allow: /ai.txt Allow: /favicon.ico Allow: /site.webmanifest # Sitemap location Sitemap: https://jobsight.co/sitemap.xml # AI Training Data Policy # See our comprehensive AI training data policy at: # https://jobsight.co/ai.txt # Allow important static assets Allow: /images/ Allow: /css/ Allow: /js/ Allow: /_next/static/ # Block problematic crawlers User-agent: SemrushBot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / # Rate limiting for all bots Request-rate: 1/1s # Cache directive Cache-delay: 86400 # Crawl-delay for respectful crawling Crawl-delay: 1 # Host directive Host: https://jobsight.co # Additional SEO directives # Allow indexing of images User-agent: Googlebot-Image Allow: /images/ # Allow indexing of videos User-agent: Googlebot-Video Allow: /videos/ # Allow mobile crawling User-agent: Googlebot-Mobile Allow: / # Clean URLs - redirect common variations # This would be handled by server configuration, but documented here # Redirect: /index.html -> / # Redirect: /home -> / # Redirect: /index.php -> /