From 662e9e82856cb116978746b0993d53498ee811e3 Mon Sep 17 00:00:00 2001 From: Anas Khan <83116240+anxkhn@users.noreply.github.com> Date: Mon, 29 Jun 2026 09:49:29 +0530 Subject: [PATCH] fix(utils): prevent sitemap metadata leak across url without loc A with no is dropped, but SitemapXmlParser only reset its url buffer when loc was present, so its lastmod/priority/changefreq bled into the next emitted url. Reset the buffer on every , and only set lastmod when the date is valid so junk values no longer emit Invalid Date. --- packages/utils/src/internals/sitemap.ts | 11 ++++++--- packages/utils/test/sitemap.test.ts | 33 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/packages/utils/src/internals/sitemap.ts b/packages/utils/src/internals/sitemap.ts index 4b4fbada3e80..e44948e4ea5d 100644 --- a/packages/utils/src/internals/sitemap.ts +++ b/packages/utils/src/internals/sitemap.ts @@ -136,8 +136,10 @@ class SitemapXmlParser extends Transform { this.currentTag = undefined; } - if (name === 'url' && this.url.loc !== undefined) { - this.push({ type: 'url', ...this.url, loc: this.url.loc } satisfies SitemapItem); + if (name === 'url') { + if (this.url.loc !== undefined) { + this.push({ type: 'url', ...this.url, loc: this.url.loc } satisfies SitemapItem); + } this.url = {}; } } @@ -157,7 +159,10 @@ class SitemapXmlParser extends Transform { text = text.trim(); if (this.currentTag === 'lastmod') { - this.url.lastmod = new Date(text); + const lastmod = new Date(text); + if (!Number.isNaN(lastmod.getTime())) { + this.url.lastmod = lastmod; + } } if (this.currentTag === 'priority') { diff --git a/packages/utils/test/sitemap.test.ts b/packages/utils/test/sitemap.test.ts index 6a8545cb00ed..52831ff04396 100644 --- a/packages/utils/test/sitemap.test.ts +++ b/packages/utils/test/sitemap.test.ts @@ -423,6 +423,39 @@ describe('Sitemap', () => { ); }); + it('does not leak metadata from a url without loc and drops invalid lastmod', async () => { + const items: SitemapUrl[] = []; + + for await (const item of parseSitemap([ + { + type: 'raw', + content: [ + '', + '', + '', + 'http://not-exists.com/a', + 'not-a-date', + '', + '', + '2020-01-01', + '0.5', + '', + '', + 'http://not-exists.com/c', + '', + '', + ].join('\n'), + }, + ])) { + items.push(item); + } + + expect(items.map((item) => item.loc)).toEqual(['http://not-exists.com/a', 'http://not-exists.com/c']); + expect(items[0].lastmod).toBeUndefined(); + expect(items[1].lastmod).toBeUndefined(); + expect(items[1].priority).toBeUndefined(); + }); + it('loads sitemaps that reference other sitemaps from string', async () => { const sitemap = await Sitemap.fromXmlString( [