1+ use std:: collections:: HashMap ;
2+
13use html5ever:: tendril:: StrTendril ;
24use markup5ever_rcdom:: { Handle , NodeData , RcDom } ;
35
46use crate :: common:: is_void_element;
57
68const INDENT_SIZE : usize = 2 ;
79
10+ /// Build a mapping from lowercased tag names to their original PascalCase form.
11+ /// html5ever lowercases all tags per HTML5 spec, but Vue/Angular use PascalCase
12+ /// components that we want to preserve.
13+ fn build_case_map ( original_html : & str ) -> HashMap < String , String > {
14+ let mut map = HashMap :: new ( ) ;
15+ let bytes = original_html. as_bytes ( ) ;
16+ let mut i = 0 ;
17+
18+ while i < bytes. len ( ) {
19+ if bytes[ i] == b'<' && i + 1 < bytes. len ( ) && bytes[ i + 1 ] . is_ascii_alphabetic ( ) {
20+ // Found a tag opening — extract the tag name
21+ let start = i + 1 ;
22+ let mut end = start;
23+ while end < bytes. len ( )
24+ && ( bytes[ end] . is_ascii_alphanumeric ( ) || bytes[ end] == b'-' || bytes[ end] == b'_' )
25+ {
26+ end += 1 ;
27+ }
28+ let tag = & original_html[ start..end] ;
29+ // Only store if it has uppercase characters (PascalCase)
30+ if tag. chars ( ) . any ( |c| c. is_ascii_uppercase ( ) ) {
31+ let lower = tag. to_ascii_lowercase ( ) ;
32+ map. entry ( lower) . or_insert_with ( || tag. to_string ( ) ) ;
33+ }
34+ i = end;
35+ } else {
36+ i += 1 ;
37+ }
38+ }
39+
40+ map
41+ }
42+
43+ /// Resolve a tag name to its original casing if it was PascalCase in the source.
44+ fn resolve_tag_case < ' a > ( tag : & ' a str , case_map : & ' a HashMap < String , String > ) -> & ' a str {
45+ case_map. get ( tag) . map ( |s| s. as_str ( ) ) . unwrap_or ( tag)
46+ }
47+
848/// Emit HSML source from an html5ever DOM.
949pub fn emit ( dom : & RcDom , original_html : & str ) -> String {
1050 let mut output = String :: new ( ) ;
1151 let lower_html = original_html. to_ascii_lowercase ( ) ;
52+ let case_map = build_case_map ( original_html) ;
1253
1354 // html5ever wraps content in <html><head><body> for document parsing.
1455 // We need to find the meaningful content nodes.
1556 let nodes = find_content_nodes ( & dom. document , & lower_html) ;
1657
1758 for node in & nodes {
18- emit_node ( node, 0 , & lower_html, & mut output) ;
59+ emit_node ( node, 0 , & lower_html, & case_map , & mut output) ;
1960 }
2061
2162 if !output. is_empty ( ) && !output. ends_with ( '\n' ) {
@@ -142,10 +183,10 @@ fn has_mixed_content(node: &Handle) -> bool {
142183}
143184
144185/// Serialize a node's children back to raw HTML (for mixed content).
145- fn serialize_inner_html ( node : & Handle ) -> String {
186+ fn serialize_inner_html ( node : & Handle , case_map : & HashMap < String , String > ) -> String {
146187 let mut html = String :: new ( ) ;
147188 for child in & effective_children ( node) {
148- serialize_node_to_html ( child, & mut html, false ) ;
189+ serialize_node_to_html ( child, & mut html, false , case_map ) ;
149190 }
150191 html
151192}
@@ -206,7 +247,12 @@ fn escape_hsml_attr(key: &str, value: &str) -> String {
206247 }
207248}
208249
209- fn serialize_node_to_html ( node : & Handle , output : & mut String , in_raw_text : bool ) {
250+ fn serialize_node_to_html (
251+ node : & Handle ,
252+ output : & mut String ,
253+ in_raw_text : bool ,
254+ case_map : & HashMap < String , String > ,
255+ ) {
210256 match & node. data {
211257 NodeData :: Text { contents } => {
212258 if in_raw_text {
@@ -216,8 +262,9 @@ fn serialize_node_to_html(node: &Handle, output: &mut String, in_raw_text: bool)
216262 }
217263 }
218264 NodeData :: Element { name, attrs, .. } => {
219- let tag = name. local . as_ref ( ) ;
220- let is_raw = matches ! ( tag, "script" | "style" ) ;
265+ let lower_tag = name. local . as_ref ( ) ;
266+ let tag = resolve_tag_case ( lower_tag, case_map) ;
267+ let is_raw = matches ! ( lower_tag, "script" | "style" ) ;
221268 output. push ( '<' ) ;
222269 output. push_str ( tag) ;
223270 for attr in attrs. borrow ( ) . iter ( ) {
@@ -227,12 +274,12 @@ fn serialize_node_to_html(node: &Handle, output: &mut String, in_raw_text: bool)
227274 output. push_str ( & escape_html_attr ( & attr. value ) ) ;
228275 output. push ( '"' ) ;
229276 }
230- if is_void_element ( tag ) {
277+ if is_void_element ( lower_tag ) {
231278 output. push_str ( " />" ) ;
232279 } else {
233280 output. push ( '>' ) ;
234281 for child in & effective_children ( node) {
235- serialize_node_to_html ( child, output, is_raw) ;
282+ serialize_node_to_html ( child, output, is_raw, case_map ) ;
236283 }
237284 output. push_str ( "</" ) ;
238285 output. push_str ( tag) ;
@@ -248,18 +295,26 @@ fn serialize_node_to_html(node: &Handle, output: &mut String, in_raw_text: bool)
248295 }
249296}
250297
251- fn emit_node ( node : & Handle , depth : usize , lower_html : & str , output : & mut String ) {
298+ fn emit_node (
299+ node : & Handle ,
300+ depth : usize ,
301+ lower_html : & str ,
302+ case_map : & HashMap < String , String > ,
303+ output : & mut String ,
304+ ) {
252305 match & node. data {
253306 NodeData :: Doctype { name, .. } => {
254307 output. push_str ( & format ! ( "doctype {name}\n " ) ) ;
255308 }
256309 NodeData :: Element { name, attrs, .. } => {
310+ let tag = resolve_tag_case ( & name. local , case_map) ;
257311 emit_element (
258312 node,
259- & name . local ,
313+ tag ,
260314 & attrs. borrow ( ) ,
261315 depth,
262316 lower_html,
317+ case_map,
263318 output,
264319 ) ;
265320 }
@@ -284,6 +339,7 @@ fn emit_element(
284339 attrs : & [ html5ever:: interface:: Attribute ] ,
285340 depth : usize ,
286341 lower_html : & str ,
342+ case_map : & HashMap < String , String > ,
287343 output : & mut String ,
288344) {
289345 let indent = " " . repeat ( depth * INDENT_SIZE ) ;
@@ -412,7 +468,7 @@ fn emit_element(
412468
413469 let raw = if has_non_text {
414470 // Has nested elements/comments — serialize as raw HTML to preserve markup
415- serialize_inner_html ( node)
471+ serialize_inner_html ( node, case_map )
416472 } else {
417473 // Text-only — collect raw text content
418474 significant_children
@@ -437,7 +493,7 @@ fn emit_element(
437493
438494 // Check for mixed content
439495 if has_mixed_content ( node) {
440- let inner = serialize_inner_html ( node) ;
496+ let inner = serialize_inner_html ( node, case_map ) ;
441497 let trimmed = inner. trim ( ) ;
442498 if trimmed. contains ( '\n' ) {
443499 // Multi-line mixed content → text block
@@ -489,7 +545,7 @@ fn emit_element(
489545 output. push_str ( & format ! ( "{indent}{line}\n " ) ) ;
490546 for child in significant_children {
491547 if !is_synthesized_empty_element ( child, lower_html) {
492- emit_node ( child, depth + 1 , lower_html, output) ;
548+ emit_node ( child, depth + 1 , lower_html, case_map , output) ;
493549 }
494550 }
495551}
0 commit comments