diff --git a/core/app/sitemap.xml/route.ts b/core/app/sitemap.xml/route.ts index 4054284e6c..0d188a4784 100644 --- a/core/app/sitemap.xml/route.ts +++ b/core/app/sitemap.xml/route.ts @@ -7,10 +7,77 @@ import { getChannelIdFromLocale } from '~/channels.config'; import { client } from '~/client'; import { defaultLocale } from '~/i18n/locales'; -export const GET = async () => { +export const GET = async (request: Request) => { + const url = new URL(request.url); + const incomingHost = request.headers.get('host') ?? url.host; + const incomingProto = request.headers.get('x-forwarded-proto') ?? url.protocol.replace(':', ''); + + const type = url.searchParams.get('type'); + const page = url.searchParams.get('page'); + + // If a specific sitemap within the index is requested, require both params + if (type !== null || page !== null) { + if (!type || !page) { + return new Response('Both "type" and "page" query params are required', { + status: 400, + headers: { 'Content-Type': 'text/plain; charset=utf-8' }, + }); + } + + const upstream = await client.fetchSitemapResponse( + { type, page }, + getChannelIdFromLocale(defaultLocale), + ); + + // Pass-through upstream status/body but enforce XML content-type + const body = await upstream.text(); + + return new Response(body, { + status: upstream.status, + statusText: upstream.statusText, + headers: { 'Content-Type': 'application/xml' }, + }); + } + + // Otherwise, return the sitemap index with normalized internal links const sitemapIndex = await client.fetchSitemapIndex(getChannelIdFromLocale(defaultLocale)); - return new Response(sitemapIndex, { + const rewritten = sitemapIndex.replace( + /([^<]+)<\/loc>/g, + (match: string, locUrlStr: string) => { + try { + // Decode XML entities for '&' so URL parsing works + const decoded: string = locUrlStr.replace(/&/g, '&'); + const original = new URL(decoded); + + if (!original.pathname.endsWith('/xmlsitemap.php')) { + return match; + } + + const normalized = new URL(`${incomingProto}://${incomingHost}/sitemap.xml`); + + const t = original.searchParams.get('type'); + const p = original.searchParams.get('page'); + + // Only rewrite entries that include both type and page; otherwise leave untouched + if (!t || !p) { + return match; + } + + normalized.searchParams.set('type', t); + normalized.searchParams.set('page', p); + + // Re-encode '&' for XML output + const normalizedXml: string = normalized.toString().replace(/&/g, '&'); + + return `${normalizedXml}`; + } catch { + return match; + } + }, + ); + + return new Response(rewritten, { headers: { 'Content-Type': 'application/xml', }, diff --git a/packages/client/src/client.ts b/packages/client/src/client.ts index dd25618110..7acf61a156 100644 --- a/packages/client/src/client.ts +++ b/packages/client/src/client.ts @@ -210,6 +210,43 @@ class Client { return response.text(); } + async fetchSitemap( + params: { type?: string | null; page?: string | number | null }, + channelId?: string, + ): Promise { + const response = await this.fetchSitemapResponse(params, channelId); + + if (!response.ok) { + throw new Error(`Unable to get Sitemap: ${response.statusText}`); + } + + return response.text(); + } + + async fetchSitemapResponse( + params: { type?: string | null; page?: string | number | null }, + channelId?: string, + ): Promise { + const baseUrl = new URL(`${await this.getCanonicalUrl(channelId)}/xmlsitemap.php`); + + // Only forward well-known params + if (params.type) baseUrl.searchParams.set('type', String(params.type)); + if (params.page !== undefined && params.page !== null) + baseUrl.searchParams.set('page', String(params.page)); + + const response = await fetch(baseUrl.toString(), { + method: 'GET', + headers: { + Accept: 'application/xml', + 'Content-Type': 'application/xml', + 'User-Agent': this.backendUserAgent, + ...(this.trustedProxySecret && { 'X-BC-Trusted-Proxy-Secret': this.trustedProxySecret }), + }, + }); + + return response; + } + private async getCanonicalUrl(channelId?: string) { const resolvedChannelId = channelId ?? (await this.getChannelId(this.defaultChannelId));