# 4. GENERAL SEARCH ENGINES # --------------------------------------------------------- User-agent: * Disallow: /cgi-bin/ Disallow: /temp/ # 5. SITEMAP LOCATION # --------------------------------------------------------- Sitemap: https://www.shawnzone.org/sitemap.xml Sitemap: https://www.shawnzone.org/image-sitemap.xml Sitemap: https://www.shawnzone.org/shawn/feed/rss2 Sitemap: https://www.shawnzone.org/notes/feed/rss2 # --- THE INVITED GUESTS --- # Googlebot & Gemini (Google-Extended is the AI training crawler) User-agent: Googlebot Allow: / # WELCOME: The Discovery Bots (TikTok & Search) User-agent: Googlebot User-agent: Bingbot Allow: /# DeepSeek (Research & Reasoning) User-agent: DeepSeekBot Allow: / # WELCOME: TikTok / ByteDance Discovery User-agent: Bytespider Allow: / # WELCOME: AI Search & Citations (These drive traffic to you) User-agent: OAI-SearchBot Allow: / User-agent: Claude-SearchBot Allow: / User-agent: PerplexityBot Allow: / # --- THE AGGRESSIVE SCRAPERS (BLOCKED) --- # BLOCK: The "User-Proxy" (OpenAI's sneaky user-initiated bot) # Note: This is a "request"—aggressive bots may ignore it. User-agent: ChatGPT-User Disallow: / # Anthropic / Claude User-agent: ClaudeBot Disallow: / User-agent: Claude-User Disallow: / # Amazon / Rufus User-agent: Amazonbot Disallow: / # Meta / Facebook AI User-agent: Meta-ExternalAgent Disallow: / User-agent: Meta-ExternalFetcher Disallow: / # Standard AI Data Aggregator User-agent: CCBot Disallow: / User-agent: * Crawl-delay: 30 User-agent: * Allow: / User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-Mobile Allow: / User-agent: msnbot Allow: / User-agent: BingBot Allow: / User-agent: MSNBot-Media Allow / User-agent: MSNBot Allow / User-agent: BytedanceBot Allow: / User-agent: Twitterbot Allow: / User-agent: Twitterbot/1.0 Allow: / User-agent: Twitterbot Allow: /images/ User-agent: Twitterbot Allow: /imcore/ User-agent: bing Allow: / User-agent: Bingbot Allow: / User-agent: facebookexternalhit/1.1 Allow: / User-agent: facebookexternalhit Allow: / User-agent: facebookcatalog/1.0 Allow: / User-agent: DuckDuckBot Allow: / User-agent: Slurp Allow: / User-agent: Applebot Allow: / User-agent: home.social Allow: / Disallow: /profile/ Disallow: /dropbox/* Disallow: /css/ Disallow: /js/ Disallow: /contact/ Disallow: /contactform/ Disallow: /crossdomain.xml Disallow: /cgi-bin/ Disallow: /wp-content/ Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /assets/ Disallow: /administrator/ Disallow: /admin/ Disallow: /modules/ Disallow: /BAK/ Disallow: /media/ Disallow: /media2/ Disallow: /media3/ Disallow: /media4/ Disallow: /notes/wp-admin/ Disallow: /shawn/wp-admin/ Disallow: /sometimes/wp-admin/ Disallow: /wp-content/uploads/ Disallow: /shawn/trackback/ Disallow: /shawn/xmlrpc.php Disallow: /notes/trackback/ Disallow: /notes/xmlrpc.php Disallow: /?s= Disallow: /trackback/ Disallow: /xmlrpc.php Allow: /notes/wp-content/uploads/ Disallow: /notes/wp-content/cache.php Disallow: /notes/wp-content/themes.php Disallow: /notes/wp-login.php Disallow: /notes/wp-content/plugins/ Disallow: /notes/feed/ Disallow: /notes/wp-includes/ Allow: /shawn/wp-content/uploads/ Disallow: /shawn/wp-login.php Disallow: /shawn/wp-content/plugins/ Disallow: /shawn/wp-content/cache.php Disallow: /shawn/wp-content/themes.php Disallow: /shawn/feed/ Disallow: /shawn/wp-includes/ Disallow: /events/feed/ Disallow: /events/* Disallow: /wiki/* User-agent: * Disallow: /?s= Disallow: /search/ Disallow: /archive/ Disallow: /embed Disallow:/z/* Disallow: /config Disallow: /search Disallow: /account$ Disallow: /account/ Disallow: /commerce/digital-download/ Disallow: /api/ Allow: /api/ui-extensions/ Disallow: /static/ User Agent: * Disallow:/*?author=* Disallow:/*&author=* Disallow:/*?tag=* Disallow:/*&tag=* Disallow:/*?month=* Disallow:/*&month=* Disallow:/*?view=* Disallow:/*&view=* Disallow:/*?format=json Disallow:/*&format=json Disallow:/*?format=page-context User-agent: ie_archive Disallow: / User-agent: * Disallow: /*blackhole Disallow: /?blackhole User-agent: PetalBot Disallow: / User-agent: DataForSeoBot Disallow: /trap/ User-agent: ia_archiver Disallow: / User-agent: archive.org_bot Disallow: / User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: ChatGPT Disallow: / User-agent: OpenAI Disallow: / User-agent: anthropic-ai Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Friendly_Crawler Disallow: / User-agent: Friendly_Crawler/Nutch Disallow: / User-agent: Friendly_Crawler/Nutch-1.20-SNAPSHOT Disallow: / User-agent: CCBot Disallow: / User-agent: Spawning-AI Disallow: / user-agent: the knowledge ai Disallow: / User-agent: ia-archiver Disallow: / User-agent: RepoLookOutBot Disallow: / User-agent: python-requests/2.28.1 Disallow: / User-agent: omgilibot Disallow: / User-agent: omgili Disallow: / User-agent: AhrefsSiteAudit Disallow: / User-agent: Amazon CloudFront Disallow: / User-agent: Amazonbot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: ia_archiver Disallow: / User-agent: archive.org_bot Disallow: / User-agent: archive.org_bot Disallow: / User-agent: europarchive.org Disallow: / User-agent: ia_archiver-web.archive.org Disallow: / User-agent: Go-http-client/1.1 Disallow: / User-agent: Python/3.8 aiohttp/3.9.5 Disallow: / User-agent: aiohttp Disallow: / User-agent: PerplexityBot Disallow: / User-agent: IRLbot Disallow: / User-agent: rogerbot Disallow: / User-agent: Nutch Disallow: / User-agent: libwww Disallow: / User-agent: sitecheck.interseer.com Disallow: / User-agent: Download Ninja Disallow: / User-agent: ZyBORG Disallow: / User-agent: Zealboy Disallow: / User-agent: Xenu Disallow: / User-agant: linko Disallow: / User-agent: GTPBot1.2 Disallow: / User-agent: ChatGTP-User Disallow: / User-agent: cohere-ai Disallow: / User-agent: GTPBot Disallow: / User-agent: Omnilibot Disallow / User-agent: 007ac Crawler Disallow / User-agent: 008 Disallow / User-agent: 2ip Bot Disallow / User-agent: 2ip.ru Disallow / User Agent: 5emeRue Disallow / User-agent: 5erue Disallow / User-agent: FriendlyCrawler/1.0 Disallow: / User-agent: FriendlyCrawler Disallow: / User-agent: CopyRightCheck Disallow: / User-agent: Black Hole Disallow: / User-agent: Buck Disallow: / User-agent: A Patent Crawler Disallow: / User-agent: Bytespider Disallow: / User-agent: Download Ninja Disallow: / User-agent: Foobot Disallow: / User-agent: Harvest Disallow: / User-agent: httplib Disallow: / User-agent: HTTrack Disallow: / User-agent: HTTrack 3.0 Disallow: / User-agent: libWeb Disallow: / User-agent: libwww Disallow: / User-agent: LinkedInBot Disallow: / User-agent: Mail.RU_Bot Disallow: / User-agent: magpie-crawler Disallow: / User-agent: moget Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Openfind Disallow: / User-agent: Pinterest Disallow: / User-agent: Pinterestbot Disallow: / User-agent: True_Robot Disallow: / User-agent: WebCopier Disallow: / User-agent: WebZIP Disallow: / User-agent: Wget Disallow: / User-agent: python-requests/2.31.0 Disallow: / User-agent: Spawning-AI Disallow: / User-agent: attracta.com Disallow: / User-agent: Go-http-client/1.1 Disallow: / User-agent: WebCopier Disallow: / User-agent: VelenPublicWebCrawler Disallow: / User-agent: ninja-crawler36.webmeup.com Disallow: / User-agent: Bytespider Disallow: / User-agent: Go-http-client/1.1 Disallow: / User-agent: python-requests/2.28.1 Disallow: / User-agent: python-requests/* Disallow: / User-agent: aiohttp/3.8.1 Disallow: / User-agent: okhttp/4.10.0 Disallow: / User-agent: okhttp Disallow: / User-agent: RepoLookoutBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: BLEXBot/1.0 Disallow: / User-Agent: panscient.com Disallow: / User-agent: Sottopop/1/0 Disallow: / User-agent: IonCrawl Disallow: / User-agent: upcontent.com/robots Disallow: / User-agent: binance.com Disallow: / User-agent: cortex/1.0 Disallow: / User-agent: Linguee Disallow: / User-agent: binance.com Disallow: / User-agent: Sogou Spider Disallow: / User-agent: Sogou web spider Disallow: / User-agent: Sogou web spider/4.0 Disallow: / User-agent: python-requests/2.28.1 Disallow: / User-agent: ALittle Client Disallow: / User-agent: barkrowler.0.9 Disallow: / User-agent: babbar.tech/crawler Disallow: / User-agent: barkrawler Disallow: / User-agent: barkrowler Disallow: / User-agent: RepoLookoutBot Disallow: / User-agent: crawl20.bl.semrush.com Disallow: / User-agent: crawl20.bl.semrush.com Disallow: / User-agent: crawl2.bl.semrush.com Disallow: / User-agent: crawl26.bl.semrush.com Disallow: / User-agent: crawl1.bl.semrush.com Disallow: / User-agent: crawl5.bl.semrush.com Disallow: / User-agent: crawl10.bl.semrush.com Disallow: / User-agent: crawl21.bl.semrush.com Disallow: / User-agent: crawl12.bl.semrush.com Disallow: / User-agent: crawl13.bl.semrush.com Disallow: / User-agent: crawl7.bl.semrush.com Disallow: / User-agent: crawl14.bl.semrush.com Disallow: / User-agent: crawl4.bl.semrush.com Disallow: / User-agent: crawl23.bl.semrush.com Disallow: / User-agent: crawl19.bl.semrush.com Disallow: / User-agent: crawl15.bl.semrush.com Disallow: / User-agent: crawl6.bl.semrush.com Disallow: / User-agent: crawl16.bl.semrush.com Disallow: / User-agent: crawl8.bl.semrush.com Disallow: / User-agent: crawl8.bl.semrush.com Disallow: / User-agent: crawl11.bl.semrush.com Disallow: / User-agent: crawl17.bl.semrush.com Disallow: / User-agent: crawl18.bl.semrush.com Disallow: / User-agent: crawl3.bl.semrush.com Disallow: / User-agent: crawl24.bl.semrush.com Disallow: / User-agent: crawl22.bl.semrush.com Disallow: / User-agent: SemrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / User-agent: SemrushBot-BA Disallow: / User-agent: SemrushBot-SWA Disallow: / User-agent: SemrushBot-CT Disallow: / User-agent: SemrushBot-BM Disallow: / User-agent: SemrushBot-SEOAB Disallow: / User-agent: crawler04.attracta.com Disallow: / User-agent: SEOkicks Disallow: / User-agent: http://jetsli.de/crawler Disallow: / User-agent: SensikaBot/x.33 Disallow: / User-agent: SensikaBot/x.33 (+http://sensika.com) Disallow: / User-agent: AutomaticSiteMap Disallow: / User-agent: TurnitinBot Disallow: / User-agent: TurnitinBot/3.0 (http://www.turnitin.com/robot/crawlerinfo.html) Disallow: / User-agent: TurnitinBot/3.0 Disallow: / User-agent: YaK Disallow: / User-agent: adsbot Disallow: / User-agent: Huaweisymantecspider Disallow: / User-agent: Purebot/1.1 Disallow: / User-agent: ezooms.bot@gmail.com Disallow: / User-agent: Ezooms Disallow: / User-agent: Ezooms/1.0 Disallow: / User-agent: Ezooms/1.0; ezooms.bot@gmail.com Disallow: / User-agent: badLinks.ru`s crawler v.2 Disallow: / User-agent: badLinks.ru`s crawler Disallow: / User-agent: badLinks.ru`s Disallow: / User-agent: badLinks.ru Disallow: / User-agent: badLinks Disallow: / user-agent: AhrefsBot disallow: / user-agent: +http://ahrefs.com/ro disallow: / user-agent: SiteBot disallow: / User-agent: Baidu Disallow: / User-agent: Baiduspider Disallow: /*?* User-agent: Baiduspider-image Disallow: / User-agent: Baiduspider-video Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: BaiDuSpider Disallow: / User-agent: BSpider Disallow: / User-agent: Sogou spider2 Disallow: /* User-agent: sogou spider Disallow: / User-agent: Sogou web spider Diallow: / User-agent: Sogou inst spider Disallow: / User-agent: Sogou blog Disallow: / User-agent: Sogou News Spider Disallow: / User-agent: Sogou Orion spider Disallow: / User-agent: Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07) Disallow: / User-agent: Sogou Pic Spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07) Disallow: / User-agent: Sogou head spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07) Disallow: / User-agent: Sogou Orion spider/3.0( http://www.sogou.com/docs/help/webmasters.htm#07) Disallow: / User-agent: Yisouspider Disallow: / User-agent: 360Spider Disallow: / User-agent: Yandex Disallow: / User-agent: MJ12bot Disallow: / User-agent: Pinterest Disallow: / User-agent: Alexibot Disallow: / User-agent: Alligator Disallow: / User-agent: Turnitin Disallow: / User-agent: TurnitinBot Disallow: / User-agent: usasearch Disallow: / User-agent: DotBot Disallow:/ User-agent: MegaIndex.ru Disallow: / User-agent: Mail.RU_Bot/2.0 Disallow: / User-agent: Mail.RU Disallow: / User-agent: Mail.RU_Bot/2.0; +http://go.mail.ru/help/robots Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: Blogarama Disallow: / Disallow: /z/* Disallow: /*?p= Disallow: /*&p= Disallow: /*?price= Disallow: /*&price= Disallow: /*?color= Disallow: /*&color= Disallow: /*?limit= Disallow: /*&limit= Disallow: /*?order= Disallow: /*&order= Disallow: /*?dir= Disallow: /*&dir= Disallow: /where/ Disallow: /w/ Disallow: /404/ Disallow: /app/ Disallow: /cgi-bin/ Disallow: /downloader/ Disallow: /errors/ Disallow: /includes/ Disallow: /js/ Disallow: /lib/ Disallow: /magento/ Disallow: /cron.php Disallow: /cron.sh Disallow: /error_log Disallow: /install.php Disallow: /LICENSE.html Disallow: /LICENSE.txt Disallow: /LICENSE_AFL.txt Disallow: /STATUS.txt Disallow: /get.php Disallow: /*.js$ Disallow: /*.css$ Disallow: /*.php$ Disallow: /*?SID= Disallow: /rss* Disallow: /*PHPSESSID Disallow: /ebooks/* .pdf Sitemap: https://www.shawnzone.org/sitemap.xml Sitemap: https://www.shawnzone.org/sitemap.txt Sitemap: https://www.shawnzone.org/notes/sitemap.xml Sitemap: https://www.shawnzone.org/shawn/sitemap.xml Sitemap: https://www.shawnzone.org/image-sitemap.xml llms.txt: https://www.shawnzone.org/llms.txt Humans.txt: https://www.shawnzone.org/humans.txt #shawnzone.org 2023 - 2026 # Allow trusted bots (optional) User-agent: Googlebot Allow: / User-agent: BytedanceBot Allow: / User-agent: Bingbot Allow: / # # robots.txt for https://www.shawnzone.org/ and friends # # Please note: There are a lot of pages on this site, and there are # some misbehaved spiders out there that go _way_ too fast. If you're # irresponsible, your access to the site may be blocked. # #https://mastodon.social/@shawnzone” rel=“shawnzone” #https://bsky.app@shawnzone #https://tiktok.com/@notshawnzone #