From 6066fccd416c64cfab6030236352dae981b113ff Mon Sep 17 00:00:00 2001 From: Gabriel de Tassigny Date: Thu, 9 Oct 2025 13:41:28 +0900 Subject: [PATCH] Strip inline SVG data URIs prior to content parsing Inline SVG data can cause some issues to our parsing if they're too long. Given we're not interested in these, it's simpler to strip them out prior to our parsing --- php/class-utils.php | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/php/class-utils.php b/php/class-utils.php index 6fa569ae..7d3efff4 100644 --- a/php/class-utils.php +++ b/php/class-utils.php @@ -620,6 +620,9 @@ public static function is_admin() { * @return array */ public static function extract_urls( $content ) { + // Remove inline SVG data URIs, as they can cause parsing issues when extracting URLs. + $content = self::strip_inline_svg_data_uris( $content ); + preg_match_all( "#([\"']?)(" . '(?:[\w-]+:)?//?' @@ -642,6 +645,25 @@ public static function extract_urls( $content ) { return array_values( $post_links ); } + /** + * Strip inline SVG data URIs from content. + * + * @param string $content The content to process. + * + * @return string The content with SVG data URIs removed. + */ + public static function strip_inline_svg_data_uris( $content ) { + // Pattern to match the data URI structure: data:image/svg+xml;base64,. + $svg_data_uri_pattern = '/data:image\/svg\+xml;base64,[a-zA-Z0-9\/\+\=]+/i'; + + // Remove all occurrences of SVG data URIs from the content. + $cleaned_content = preg_replace( $svg_data_uri_pattern, '', $content ); + + // In case an error occurred, we return the original content to avoid data loss. + return is_null( $cleaned_content ) ? $content : $cleaned_content; + } + + /** * Is saving metadata. *