@@ -13,7 +13,9 @@ namespace DotNetNuke.Common.Utilities
1313 using DotNetNuke . Internal . SourceGenerators ;
1414 using DotNetNuke . Services . Upgrade ;
1515
16- /// <summary>HtmlUtils is a Utility class that provides Html Utility methods.</summary>
16+ using Ganss . Xss ;
17+
18+ /// <summary>HtmlUtils is a Utility class that provides HTML Utility methods.</summary>
1719 public partial class HtmlUtils
1820 {
1921 // Create Regular Expression objects
@@ -579,5 +581,81 @@ public static IHtmlString JavaScriptStringEncode(string value)
579581 /// <inheritdoc cref="HttpUtility.JavaScriptStringEncode(string,bool)"/>
580582 public static IHtmlString JavaScriptStringEncode ( string value , bool addDoubleQuotes )
581583 => new HtmlString ( HttpUtility . JavaScriptStringEncode ( value , addDoubleQuotes ) ) ;
584+
585+ /// <summary>Sanitize the given HTML, removing element which could include JavaScript.</summary>
586+ /// <param name="htmlInput">The HTML to sanitize.</param>
587+ /// <returns>The sanitized HTML.</returns>
588+ public static string CleanOutOfJavascript ( string htmlInput )
589+ {
590+ var sanitizer = new HtmlSanitizer ( ) ;
591+
592+ // We need to disallow all attributes that might contain JS
593+ sanitizer . AllowedAttributes . Remove ( "onclick" ) ;
594+ sanitizer . AllowedAttributes . Remove ( "onmouseover" ) ;
595+ sanitizer . AllowedAttributes . Remove ( "onmouseout" ) ;
596+ sanitizer . AllowedAttributes . Remove ( "onkeypress" ) ;
597+ sanitizer . AllowedAttributes . Remove ( "onkeydown" ) ;
598+ sanitizer . AllowedAttributes . Remove ( "onkeyup" ) ;
599+
600+ // We need to disallow tags like '<form action="javascript:submitForm()">'
601+ sanitizer . AllowedSchemes . Remove ( "javascript" ) ;
602+
603+ // Tags like '<script>' are obviously not allowed
604+ sanitizer . AllowedTags . Remove ( "script" ) ;
605+
606+ return sanitizer . Sanitize ( htmlInput ) ;
607+ }
608+
609+ /// <summary>Determines whether the given <paramref name="htmlInput"/> contains any JavaScript.</summary>
610+ /// <param name="htmlInput">The HTML to check.</param>
611+ /// <returns><see langword="true"/> if <paramref name="htmlInput"/> contains JavaScript, otherwise <see langword="false"/>.</returns>
612+ public static bool ContainsJavaScript ( string htmlInput )
613+ {
614+ if ( string . IsNullOrEmpty ( htmlInput ) )
615+ {
616+ return false ;
617+ }
618+
619+ string cleaned = CleanOutOfJavascript ( htmlInput ) ;
620+
621+ // Strip all HTML syntax characters and whitespace for comparison
622+ string strippedOriginal = StripHtmlSyntax ( htmlInput ) ;
623+ string strippedCleaned = StripHtmlSyntax ( cleaned ) ;
624+
625+ // If the stripped versions are different, JavaScript was likely removed
626+ return ! string . Equals ( strippedOriginal , strippedCleaned , StringComparison . OrdinalIgnoreCase ) ;
627+ }
628+
629+ /// <summary>Sanitizes the given <paramref name="rawHtmlInput"/> if <paramref name="allowJavaScript"/> is <see langword="false"/>.</summary>
630+ /// <param name="rawHtmlInput">The raw HTML input.</param>
631+ /// <param name="allowJavaScript">Whether to allow JavaScript in the HTML.</param>
632+ /// <returns>The HTML, potentially sanitized.</returns>
633+ public static string SanitizeHtmlIfNeeded ( string rawHtmlInput , bool allowJavaScript )
634+ {
635+ // If input is null or empty: nothing to do
636+ if ( string . IsNullOrEmpty ( rawHtmlInput ) )
637+ {
638+ return string . Empty ;
639+ }
640+
641+ // If JavaScript is not allowed: HTML must be sanitized
642+ if ( ! allowJavaScript )
643+ {
644+ return CleanOutOfJavascript ( rawHtmlInput ) ;
645+ }
646+
647+ return rawHtmlInput ;
648+ }
649+
650+ private static string StripHtmlSyntax ( string html )
651+ {
652+ if ( string . IsNullOrEmpty ( html ) )
653+ {
654+ return string . Empty ;
655+ }
656+
657+ // Remove all whitespace and HTML syntax characters
658+ return Regex . Replace ( html , @"[\s<>/""'=]" , string . Empty ) ;
659+ }
582660 }
583661}
0 commit comments