From 662e9e82856cb116978746b0993d53498ee811e3 Mon Sep 17 00:00:00 2001
From: Anas Khan <83116240+anxkhn@users.noreply.github.com>
Date: Mon, 29 Jun 2026 09:49:29 +0530
Subject: [PATCH] fix(utils): prevent sitemap metadata leak across url without
loc
A with no is dropped, but SitemapXmlParser only reset its url buffer when loc was present, so its lastmod/priority/changefreq bled into the next emitted url. Reset the buffer on every , and only set lastmod when the date is valid so junk values no longer emit Invalid Date.
---
packages/utils/src/internals/sitemap.ts | 11 ++++++---
packages/utils/test/sitemap.test.ts | 33 +++++++++++++++++++++++++
2 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/packages/utils/src/internals/sitemap.ts b/packages/utils/src/internals/sitemap.ts
index 4b4fbada3e80..e44948e4ea5d 100644
--- a/packages/utils/src/internals/sitemap.ts
+++ b/packages/utils/src/internals/sitemap.ts
@@ -136,8 +136,10 @@ class SitemapXmlParser extends Transform {
this.currentTag = undefined;
}
- if (name === 'url' && this.url.loc !== undefined) {
- this.push({ type: 'url', ...this.url, loc: this.url.loc } satisfies SitemapItem);
+ if (name === 'url') {
+ if (this.url.loc !== undefined) {
+ this.push({ type: 'url', ...this.url, loc: this.url.loc } satisfies SitemapItem);
+ }
this.url = {};
}
}
@@ -157,7 +159,10 @@ class SitemapXmlParser extends Transform {
text = text.trim();
if (this.currentTag === 'lastmod') {
- this.url.lastmod = new Date(text);
+ const lastmod = new Date(text);
+ if (!Number.isNaN(lastmod.getTime())) {
+ this.url.lastmod = lastmod;
+ }
}
if (this.currentTag === 'priority') {
diff --git a/packages/utils/test/sitemap.test.ts b/packages/utils/test/sitemap.test.ts
index 6a8545cb00ed..52831ff04396 100644
--- a/packages/utils/test/sitemap.test.ts
+++ b/packages/utils/test/sitemap.test.ts
@@ -423,6 +423,39 @@ describe('Sitemap', () => {
);
});
+ it('does not leak metadata from a url without loc and drops invalid lastmod', async () => {
+ const items: SitemapUrl[] = [];
+
+ for await (const item of parseSitemap([
+ {
+ type: 'raw',
+ content: [
+ '',
+ '',
+ '',
+ 'http://not-exists.com/a',
+ 'not-a-date',
+ '',
+ '',
+ '2020-01-01',
+ '0.5',
+ '',
+ '',
+ 'http://not-exists.com/c',
+ '',
+ '',
+ ].join('\n'),
+ },
+ ])) {
+ items.push(item);
+ }
+
+ expect(items.map((item) => item.loc)).toEqual(['http://not-exists.com/a', 'http://not-exists.com/c']);
+ expect(items[0].lastmod).toBeUndefined();
+ expect(items[1].lastmod).toBeUndefined();
+ expect(items[1].priority).toBeUndefined();
+ });
+
it('loads sitemaps that reference other sitemaps from string', async () => {
const sitemap = await Sitemap.fromXmlString(
[