diff --git a/crates/common/src/auction/formats.rs b/crates/common/src/auction/formats.rs
index 3eb4d843..71347027 100644
--- a/crates/common/src/auction/formats.rs
+++ b/crates/common/src/auction/formats.rs
@@ -217,15 +217,17 @@ pub fn convert_to_openrtb_response(
})
})?;
- // Process creative HTML if present - rewrite URLs and return inline
+ // Process creative HTML if present — sanitize dangerous markup first, then rewrite URLs.
let creative_html = if let Some(ref raw_creative) = bid.creative {
- // Rewrite creative HTML with proxy URLs for first-party delivery
- let rewritten = creative::rewrite_creative_html(settings, raw_creative);
+ let sanitized = creative::sanitize_creative_html(raw_creative);
+ let rewritten = creative::rewrite_creative_html(settings, &sanitized);
log::debug!(
- "Rewritten creative for auction {} slot {} ({} bytes)",
+ "Processed creative for auction {} slot {} ({} → {} → {} bytes)",
auction_request.id,
slot_id,
+ raw_creative.len(),
+ sanitized.len(),
rewritten.len()
);
diff --git a/crates/common/src/creative.rs b/crates/common/src/creative.rs
index 63db7e6f..45162d8c 100644
--- a/crates/common/src/creative.rs
+++ b/crates/common/src/creative.rs
@@ -303,6 +303,198 @@ pub fn rewrite_css_body(settings: &Settings, css: &str) -> String {
rewrite_style_urls(settings, css)
}
+/// Maximum byte length of creative HTML accepted by [`sanitize_creative_html`].
+///
+/// Inputs larger than this are rejected (empty string returned) to prevent unbounded
+/// allocations on the hot path. Fastly Compute enforces upstream request-body limits,
+/// but this guard protects internal callers too.
+const MAX_CREATIVE_SIZE: usize = 1024 * 1024; // 1 MiB
+
+/// Returns `true` if a lowercased `data:` URI points to a safe, non-executable MIME type.
+///
+/// Only well-known raster image formats are allowed. `data:image/svg+xml` is **excluded**
+/// because SVG documents can contain `"#;
+ let out = sanitize_creative_html(html);
+ assert!(!out.contains("">"#;
+ let out = sanitize_creative_html(html);
+ assert!(!out.contains("data:text/html"), "should strip data: src");
+ }
+
+ #[test]
+ fn sanitize_strips_dangerous_data_src_attribute() {
+ // data-src is used by lazy-loaders; dangerous URI schemes must be stripped.
+ let html = r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("javascript:"),
+ "should strip javascript: in data-src"
+ );
+ }
+
+ #[test]
+ fn sanitize_strips_dangerous_srcset_leading_entry() {
+ // A javascript: URI at the start of srcset must be stripped.
+ let html =
+ r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("srcset"),
+ "should remove srcset with leading dangerous URL"
+ );
+ assert!(
+ !out.contains("javascript:"),
+ "should strip javascript: from srcset"
+ );
+ }
+
+ #[test]
+ fn sanitize_strips_dangerous_srcset_non_leading_entry() {
+ // A javascript: URI that is NOT the first entry must also be stripped.
+ // This was the gap in the previous starts_with-only check.
+ let html =
+ r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("srcset"),
+ "should remove srcset with non-leading dangerous URL"
+ );
+ assert!(
+ !out.contains("javascript:"),
+ "should strip javascript: from non-leading srcset entry"
+ );
+ }
+
+ #[test]
+ fn sanitize_preserves_safe_srcset() {
+ // A fully safe srcset must be preserved.
+ let html = r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(out.contains("srcset"), "should preserve safe srcset");
+ assert!(
+ out.contains("small.png"),
+ "should preserve first srcset URL"
+ );
+ assert!(
+ out.contains("large.png"),
+ "should preserve second srcset URL"
+ );
+ }
+
+ #[test]
+ fn sanitize_strips_dangerous_imagesrcset_on_source() {
+ // is not in the element removal list, so imagesrcset must be
+ // sanitized by the attribute handler. is already
+ // covered by link removal, but is not.
+ let html = r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("imagesrcset"),
+ "should strip dangerous imagesrcset attribute"
+ );
+ assert!(
+ !out.contains("javascript:"),
+ "should not contain javascript: after stripping imagesrcset"
+ );
+ assert!(
+ out.contains(""#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("imagesrcset"),
+ "should remove imagesrcset with non-leading dangerous URL"
+ );
+ }
+
+ #[test]
+ fn sanitize_preserves_safe_imagesrcset() {
+ let html = r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ out.contains("imagesrcset"),
+ "should preserve safe imagesrcset"
+ );
+ assert!(
+ out.contains("img-1x.png"),
+ "should preserve first candidate"
+ );
+ assert!(
+ out.contains("img-2x.png"),
+ "should preserve second candidate"
+ );
+ }
+
+ #[test]
+ fn sanitize_strips_data_svg_imagesrcset() {
+ // data:image/svg+xml can embed script — must be rejected even though it
+ // starts with "data:image/". Mirrors sanitize_strips_data_svg_src coverage
+ // for imagesrcset.
+ let html = r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("imagesrcset"),
+ "should strip data:image/svg imagesrcset"
+ );
+ assert!(
+ !out.contains("data:image/svg"),
+ "should not contain svg data URI after stripping"
+ );
+ }
+
+ #[test]
+ fn sanitize_strips_dangerous_inline_style() {
+ let html = r#"
ad
"#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("expression("),
+ "should strip expression() in style"
+ );
+ assert!(out.contains("ad"), "should preserve element content");
+ }
+
+ #[test]
+ fn sanitize_strips_javascript_in_style() {
+ let html = r#"
ad
"#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ !out.contains("javascript:"),
+ "should strip javascript: in style"
+ );
+ }
+
+ #[test]
+ fn sanitize_preserves_safe_inline_style() {
+ let html = r#"
styled ad
"#;
+ let out = sanitize_creative_html(html);
+ assert!(out.contains("style="), "should preserve safe inline style");
+ assert!(out.contains("color:red"), "should preserve style value");
+ }
+
+ #[test]
+ fn sanitize_preserves_mailto_href() {
+ let html = r#"email"#;
+ let out = sanitize_creative_html(html);
+ assert!(
+ out.contains("mailto:contact@example.com"),
+ "should preserve mailto href"
+ );
+ }
+
+ #[test]
+ fn sanitize_passes_through_empty_input() {
+ let out = sanitize_creative_html("");
+ assert_eq!(out, "", "should return empty string unchanged");
+ }
+
+ #[test]
+ fn sanitize_removes_link_element() {
+ let html = r#""#;
+ let out = sanitize_creative_html(html);
+ assert!(!out.contains(" blocks can carry CSS expressions, @import, and url() payloads.
+ // Treated the same as : stripped entirely.
+ let html = r#"
ad
"#;
+ let out = sanitize_creative_html(html);
+ assert!(!out.contains(""#;
+ let out = sanitize_creative_html(html);
+ assert!(!out.contains("