From 45a6c8b90d39d12d68111f5b510b4fcc84d6edde Mon Sep 17 00:00:00 2001 From: ImJustChew Date: Mon, 27 Apr 2026 17:29:28 +0800 Subject: [PATCH] fix(seo): fix canonical URLs, hreflang, soft-404s, and noindex for private pages - worker.ts: generate both zh and en course URLs as separate sitemap entries so Google indexes both language versions instead of treating en as alternates - worker.ts: inject correct hreflang links (zh-TW/en/x-default) for course and bus pages served to bots, fixing the static root hreflang in index.html - worker.ts: return 404 status + noindex meta + X-Robots-Tag for missing courses to eliminate 1321 soft-404 pages (was returning 200 with generic shell) - worker.ts: add handleGenericBotPage() that sets correct canonical and hreflang for all other lang-prefixed bot requests (strips query params from canonical) - worker.ts: add bus route URLs (main/nanda, zh+en) to dynamic sitemap - worker.ts: add calendar, sports-venues, chat, shops, apps to static pages list - CourseDetailsContainer: fix hardcoded /zh/ canonical to use lang prop so en course pages self-canonicalize correctly for client-side rendering - CourseDetailsContainer: add hreflang alternates in course Helmet so bots that render JS see correct language relationships - CourseDetailsContainer: add noindex Helmet to 404/error state - router.tsx: add noindex handle to settings, student/*, next-steps, waitlist, design-system routes (already blocked by robots.txt, belt-and-suspenders) - sitemap.xml: add bus routes, calendar, sports-venues, chat, shops, apps pages Co-Authored-By: Claude Sonnet 4.6 --- apps/web/public/sitemap.xml | 91 +++++-- .../CourseDetails/CourseDetailsContainer.tsx | 45 +++- apps/web/src/router.tsx | 14 +- apps/web/worker.ts | 248 +++++++++++++++--- 4 files changed, 321 insertions(+), 77 deletions(-) diff --git a/apps/web/public/sitemap.xml b/apps/web/public/sitemap.xml index 61231cc4..2cb37d96 100644 --- a/apps/web/public/sitemap.xml +++ b/apps/web/public/sitemap.xml @@ -6,10 +6,13 @@ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"> + + + https://nthumods.com/zh/courses - 2026-04-21 + 2026-04-27 daily 1.00 @@ -18,7 +21,7 @@ https://nthumods.com/en/courses - 2026-04-21 + 2026-04-27 daily 1.00 @@ -29,7 +32,7 @@ https://nthumods.com/zh/timetable - 2026-04-21 + 2026-04-27 weekly 0.90 @@ -38,7 +41,7 @@ https://nthumods.com/en/timetable - 2026-04-21 + 2026-04-27 weekly 0.90 @@ -49,7 +52,7 @@ https://nthumods.com/zh/today - 2026-04-21 + 2026-04-27 daily 0.90 @@ -58,7 +61,7 @@ https://nthumods.com/en/today - 2026-04-21 + 2026-04-27 daily 0.90 @@ -67,7 +70,7 @@ https://nthumods.com/zh/calendar - 2026-04-21 + 2026-04-27 weekly 0.85 @@ -76,7 +79,7 @@ https://nthumods.com/en/calendar - 2026-04-21 + 2026-04-27 weekly 0.85 @@ -87,7 +90,7 @@ https://nthumods.com/zh/bus - 2026-04-21 + 2026-04-27 weekly 0.85 @@ -96,18 +99,54 @@ https://nthumods.com/en/bus - 2026-04-21 + 2026-04-27 weekly 0.85 + + https://nthumods.com/zh/bus/main + 2026-04-27 + weekly + 0.75 + + + + + + https://nthumods.com/en/bus/main + 2026-04-27 + weekly + 0.75 + + + + + + https://nthumods.com/zh/bus/nanda + 2026-04-27 + weekly + 0.75 + + + + + + https://nthumods.com/en/bus/nanda + 2026-04-27 + weekly + 0.75 + + + + https://nthumods.com/zh/venues - 2026-04-21 + 2026-04-27 monthly 0.80 @@ -116,7 +155,7 @@ https://nthumods.com/en/venues - 2026-04-21 + 2026-04-27 monthly 0.80 @@ -127,7 +166,7 @@ https://nthumods.com/zh/sports-venues - 2026-04-21 + 2026-04-27 weekly 0.80 @@ -136,7 +175,7 @@ https://nthumods.com/en/sports-venues - 2026-04-21 + 2026-04-27 weekly 0.80 @@ -147,7 +186,7 @@ https://nthumods.com/zh/chat - 2026-04-21 + 2026-04-27 monthly 0.75 @@ -156,7 +195,7 @@ https://nthumods.com/en/chat - 2026-04-21 + 2026-04-27 monthly 0.75 @@ -167,7 +206,7 @@ https://nthumods.com/zh/shops - 2026-04-21 + 2026-04-27 weekly 0.70 @@ -176,7 +215,7 @@ https://nthumods.com/en/shops - 2026-04-21 + 2026-04-27 weekly 0.70 @@ -187,7 +226,7 @@ https://nthumods.com/zh/apps - 2026-04-21 + 2026-04-27 monthly 0.70 @@ -196,7 +235,7 @@ https://nthumods.com/en/apps - 2026-04-21 + 2026-04-27 monthly 0.70 @@ -207,7 +246,7 @@ https://nthumods.com/zh/team - 2026-04-21 + 2026-04-27 monthly 0.60 @@ -216,7 +255,7 @@ https://nthumods.com/en/team - 2026-04-21 + 2026-04-27 monthly 0.60 @@ -227,7 +266,7 @@ https://nthumods.com/zh/contribute - 2026-04-21 + 2026-04-27 monthly 0.60 @@ -236,7 +275,7 @@ https://nthumods.com/en/contribute - 2026-04-21 + 2026-04-27 monthly 0.60 @@ -247,7 +286,7 @@ https://nthumods.com/zh/privacy-policy - 2026-04-21 + 2026-04-27 yearly 0.40 @@ -256,7 +295,7 @@ https://nthumods.com/en/privacy-policy - 2026-04-21 + 2026-04-27 yearly 0.40 diff --git a/apps/web/src/components/CourseDetails/CourseDetailsContainer.tsx b/apps/web/src/components/CourseDetails/CourseDetailsContainer.tsx index 0813f045..153f8299 100644 --- a/apps/web/src/components/CourseDetails/CourseDetailsContainer.tsx +++ b/apps/web/src/components/CourseDetails/CourseDetailsContainer.tsx @@ -167,7 +167,7 @@ const CourseDetailContainer = ({ courseCode: `${course.department} ${course.course}-${course.class}`, educationalLevel: "University", inLanguage: "zh-TW", - url: `https://nthumods.com/zh/courses/${course.raw_id}`, + url: `https://nthumods.com/${lang}/courses/${course.raw_id}`, offers: { "@type": "Offer", price: "0", @@ -227,18 +227,24 @@ const CourseDetailContainer = ({ // Handle error state if (error || !course) { return ( -
-
-

404

-

找不到課程

+ <> + + + + +
+
+

404

+

找不到課程

- - - + + + +
-
+ ); } @@ -266,6 +272,21 @@ const CourseDetailContainer = ({ /> + + + , + handle: { noindex: true }, }, { path: "offline", @@ -393,7 +399,11 @@ export const router = createBrowserRouter([ { path: "design-system", element: , - handle: { title: "Design System", titleZh: "設計系統" }, + handle: { + title: "Design System", + titleZh: "設計系統", + noindex: true, + }, }, { path: "*", @@ -404,7 +414,7 @@ export const router = createBrowserRouter([ { path: "waitlist", element: , - handle: { title: "Waitlist", titleZh: "候補" }, + handle: { title: "Waitlist", titleZh: "候補", noindex: true }, }, ], }, diff --git a/apps/web/worker.ts b/apps/web/worker.ts index a72a57b3..1052f827 100644 --- a/apps/web/worker.ts +++ b/apps/web/worker.ts @@ -58,6 +58,8 @@ interface CourseMetaData { ogTitle: string; ogDescription: string; canonicalUrl: string; + zhUrl: string; + enUrl: string; ogType: string; } @@ -85,7 +87,10 @@ function buildCourseMetaData(course: any, lang: string): CourseMetaData { ? `${course.name_zh} - ${course.department} | NTHUMods` : `${course.name_en} - ${course.department} | NTHUMods`; - const canonicalUrl = `https://nthumods.com/${lang}/courses/${encodeURIComponent(course.raw_id)}`; + const courseId = encodeURIComponent(course.raw_id); + const canonicalUrl = `https://nthumods.com/${lang}/courses/${courseId}`; + const zhUrl = `https://nthumods.com/zh/courses/${courseId}`; + const enUrl = `https://nthumods.com/en/courses/${courseId}`; return { title, @@ -93,32 +98,54 @@ function buildCourseMetaData(course: any, lang: string): CourseMetaData { ogTitle, ogDescription: description, canonicalUrl, + zhUrl, + enUrl, ogType: "article", }; } +function applyHreflang( + rewriter: HTMLRewriter, + zhUrl: string, + enUrl: string, + xDefaultUrl: string, +): HTMLRewriter { + return rewriter.on('link[rel="alternate"]', { + element(el) { + const hreflang = el.getAttribute("hreflang"); + if (!hreflang) return; + if (hreflang === "zh" || hreflang === "zh-TW") { + el.setAttribute("hreflang", "zh-TW"); + el.setAttribute("href", zhUrl); + } else if (hreflang === "en") { + el.setAttribute("href", enUrl); + } else if (hreflang === "x-default") { + el.setAttribute("href", xDefaultUrl); + } + }, + }); +} + async function handleCourseDetailPage( lang: string, courseId: string, env: Env, origin: string, ): Promise { - const fallback = () => env.ASSETS.fetch(new Request(`${origin}/index.html`)); - try { const apiRes = await fetch( `https://api.nthumods.com/course/${encodeURIComponent(courseId)}`, { cf: { cacheTtl: 86400, cacheEverything: true } } as RequestInit, ); - if (!apiRes.ok) { - return fallback(); + if (!apiRes.ok || apiRes.status === 404) { + return handleMissingCourse(lang, env, origin); } const course = (await apiRes.json()) as any; if (!course?.name_zh) { - return fallback(); + return handleMissingCourse(lang, env, origin); } const meta = buildCourseMetaData(course, lang); @@ -126,7 +153,7 @@ async function handleCourseDetailPage( new Request(`${origin}/index.html`), ); - return new HTMLRewriter() + let rewriter = new HTMLRewriter() .on("title", { element(el) { el.setInnerContent(meta.title); @@ -171,18 +198,51 @@ async function handleCourseDetailPage( element(el) { el.setAttribute("href", meta.canonicalUrl); }, - }) - .transform(shellRes); + }); + + rewriter = applyHreflang(rewriter, meta.zhUrl, meta.enUrl, meta.zhUrl); + + return rewriter.transform(shellRes); } catch { - return fallback(); + return handleMissingCourse(lang, env, origin); } } +async function handleMissingCourse( + lang: string, + env: Env, + origin: string, +): Promise { + const shellRes = await env.ASSETS.fetch(new Request(`${origin}/index.html`)); + const coursesUrl = `https://nthumods.com/${lang}/courses`; + + const notFoundShell = new Response(shellRes.body, { + status: 404, + statusText: "Not Found", + headers: new Headers({ + ...Object.fromEntries(shellRes.headers), + "X-Robots-Tag": "noindex, follow", + }), + }); + + return new HTMLRewriter() + .on('meta[name="robots"]', { + element(el) { + el.setAttribute("content", "noindex, follow"); + }, + }) + .on('link[rel="canonical"]', { + element(el) { + el.setAttribute("href", coursesUrl); + }, + }) + .transform(notFoundShell); +} + async function handleDepartmentPage(url: URL, env: Env): Promise { const dept = url.searchParams.get("department") ?? ""; const lang = url.pathname.includes("/zh/") ? "zh" : "en"; - const fallback = () => - env.ASSETS.fetch(new Request(`${url.origin}/index.html`)); + const fallback = () => handleGenericBotPage(url, env); try { const apiRes = await supabaseFetch( @@ -213,12 +273,14 @@ async function handleDepartmentPage(url: URL, env: Env): Promise { : `${first3.map((c) => c.name_en ?? c.name_zh).join(", ")}... and ${count} more courses.`; const canonicalUrl = `https://nthumods.com/${lang}/courses?department=${encodeURIComponent(dept)}`; + const zhUrl = `https://nthumods.com/zh/courses?department=${encodeURIComponent(dept)}`; + const enUrl = `https://nthumods.com/en/courses?department=${encodeURIComponent(dept)}`; const shellRes = await env.ASSETS.fetch( new Request(`${url.origin}/index.html`), ); - return new HTMLRewriter() + let rewriter = new HTMLRewriter() .on("title", { element(el) { el.setInnerContent(title); @@ -253,8 +315,11 @@ async function handleDepartmentPage(url: URL, env: Env): Promise { element(el) { el.setAttribute("href", canonicalUrl); }, - }) - .transform(shellRes); + }); + + rewriter = applyHreflang(rewriter, zhUrl, enUrl, zhUrl); + + return rewriter.transform(shellRes); } catch { return fallback(); } @@ -288,12 +353,14 @@ async function handleBusPage( : `View NTHU ${routeName.en} real-time schedule, route map, and stop information.`; const canonicalUrl = `https://nthumods.com/${lang}/bus/${route}`; + const zhUrl = `https://nthumods.com/zh/bus/${route}`; + const enUrl = `https://nthumods.com/en/bus/${route}`; const shellRes = await env.ASSETS.fetch( new Request(`${origin}/index.html`), ); - return new HTMLRewriter() + let rewriter = new HTMLRewriter() .on("title", { element(el) { el.setInnerContent(title); @@ -328,13 +395,43 @@ async function handleBusPage( element(el) { el.setAttribute("href", canonicalUrl); }, - }) - .transform(shellRes); + }); + + rewriter = applyHreflang(rewriter, zhUrl, enUrl, zhUrl); + + return rewriter.transform(shellRes); } catch { return fallback(); } } +// Generic handler for all other bot page requests: sets canonical and hreflang +// based on the current URL (strips query params from canonical for non-dept pages) +async function handleGenericBotPage(url: URL, env: Env): Promise { + const pathname = url.pathname; + // Canonical strips query params unless it's a department filter + const canonicalUrl = `https://nthumods.com${pathname}`; + + const zhPath = pathname.replace(/^\/(zh|en)\//, "/zh/"); + const enPath = pathname.replace(/^\/(zh|en)\//, "/en/"); + const zhUrl = `https://nthumods.com${zhPath}`; + const enUrl = `https://nthumods.com${enPath}`; + + const shellRes = await env.ASSETS.fetch( + new Request(`${url.origin}/index.html`), + ); + + let rewriter = new HTMLRewriter().on('link[rel="canonical"]', { + element(el) { + el.setAttribute("href", canonicalUrl); + }, + }); + + rewriter = applyHreflang(rewriter, zhUrl, enUrl, zhUrl); + + return rewriter.transform(shellRes); +} + const FALLBACK_STATIC_SITEMAP = ` https://nthumods.com/zh/weekly1.00 @@ -361,35 +458,81 @@ function buildSitemapXML(courses: any[]): string { { path: "/en/timetable", priority: "0.90", changefreq: "weekly" }, { path: "/zh/today", priority: "0.85", changefreq: "daily" }, { path: "/en/today", priority: "0.85", changefreq: "daily" }, + { path: "/zh/calendar", priority: "0.85", changefreq: "weekly" }, + { path: "/en/calendar", priority: "0.85", changefreq: "weekly" }, { path: "/zh/bus", priority: "0.80", changefreq: "daily" }, { path: "/en/bus", priority: "0.80", changefreq: "daily" }, + { path: "/zh/bus/main", priority: "0.75", changefreq: "weekly" }, + { path: "/en/bus/main", priority: "0.75", changefreq: "weekly" }, + { path: "/zh/bus/nanda", priority: "0.75", changefreq: "weekly" }, + { path: "/en/bus/nanda", priority: "0.75", changefreq: "weekly" }, { path: "/zh/venues", priority: "0.70", changefreq: "weekly" }, { path: "/en/venues", priority: "0.70", changefreq: "weekly" }, + { path: "/zh/sports-venues", priority: "0.70", changefreq: "weekly" }, + { path: "/en/sports-venues", priority: "0.70", changefreq: "weekly" }, + { path: "/zh/chat", priority: "0.65", changefreq: "monthly" }, + { path: "/en/chat", priority: "0.65", changefreq: "monthly" }, + { path: "/zh/shops", priority: "0.65", changefreq: "weekly" }, + { path: "/en/shops", priority: "0.65", changefreq: "weekly" }, + { path: "/zh/apps", priority: "0.65", changefreq: "monthly" }, + { path: "/en/apps", priority: "0.65", changefreq: "monthly" }, { path: "/zh/team", priority: "0.50", changefreq: "monthly" }, { path: "/en/team", priority: "0.50", changefreq: "monthly" }, { path: "/zh/contribute", priority: "0.50", changefreq: "monthly" }, { path: "/en/contribute", priority: "0.50", changefreq: "monthly" }, + { path: "/zh/privacy-policy", priority: "0.40", changefreq: "yearly" }, + { path: "/en/privacy-policy", priority: "0.40", changefreq: "yearly" }, ]; const staticUrls = staticPages - .map( - (p) => - ` https://nthumods.com${p.path}${p.changefreq}${p.priority}`, - ) + .map((p) => { + const zhPath = p.path + .replace(/^\/(zh|en)\//, "/zh/") + .replace(/^\/(zh|en)$/, "/zh"); + const enPath = p.path + .replace(/^\/(zh|en)\//, "/en/") + .replace(/^\/(zh|en)$/, "/en"); + const zhUrl = `https://nthumods.com${zhPath}`; + const enUrl = `https://nthumods.com${enPath}`; + const loc = `https://nthumods.com${p.path}`; + return ` + ${loc} + ${today} + ${p.changefreq} + ${p.priority} + + + + `; + }) .join("\n"); + // Each course gets BOTH zh and en URL entries so Google indexes both language versions const courseUrls = courses - .map((course) => { + .flatMap((course) => { const courseId = encodeURIComponent(course.raw_id); - return ` - https://nthumods.com/zh/courses/${courseId} + const zhUrl = `https://nthumods.com/zh/courses/${courseId}`; + const enUrl = `https://nthumods.com/en/courses/${courseId}`; + return [ + ` + ${zhUrl} ${today} - weekly - 0.80 - - - - `; + monthly + 0.75 + + + + `, + ` + ${enUrl} + ${today} + monthly + 0.70 + + + + `, + ]; }) .join("\n"); @@ -407,14 +550,36 @@ function buildSitemapXML(courses: any[]): string { ]; const deptUrls = majorDepts - .map( - (dept) => - ` https://nthumods.com/zh/courses?department=${encodeURIComponent(dept)}weekly0.70`, - ) + .flatMap((dept) => { + const zhUrl = `https://nthumods.com/zh/courses?department=${encodeURIComponent(dept)}`; + const enUrl = `https://nthumods.com/en/courses?department=${encodeURIComponent(dept)}`; + return [ + ` + ${zhUrl} + weekly + 0.65 + + + + `, + ` + ${enUrl} + weekly + 0.60 + + + + `, + ]; + }) .join("\n"); return ` - + ${staticUrls} ${courseUrls} ${deptUrls} @@ -466,7 +631,7 @@ async function generateSitemap(env: Env): Promise { const xml = buildSitemapXML(courses); const response = new Response(xml, { headers: { - "Content-Type": "application/xml", + "Content-Type": "application/xml; charset=utf-8", "Cache-Control": "public, max-age=86400", }, }); @@ -492,6 +657,7 @@ export default { return env.ASSETS.fetch(request); } + // Course detail page const courseMatch = url.pathname.match(/^\/(zh|en)\/courses\/(.+)$/); if (courseMatch) { return handleCourseDetailPage( @@ -502,6 +668,7 @@ export default { ); } + // Department-filtered course list const deptMatch = url.pathname.match(/^\/(zh|en)\/courses$/) && url.searchParams.has("department"); @@ -509,11 +676,18 @@ export default { return handleDepartmentPage(url, env); } + // Bus route/line pages const busMatch = url.pathname.match(/^\/(zh|en)\/bus\/(.+)$/); if (busMatch) { return handleBusPage(busMatch[1], busMatch[2], env, url.origin); } + // All other bot requests to lang-prefixed pages: fix canonical and hreflang + const langPageMatch = url.pathname.match(/^\/(zh|en)(\/|$)/); + if (langPageMatch) { + return handleGenericBotPage(url, env); + } + return env.ASSETS.fetch(request); }, };