Skip to content

Commit 5a77749

Browse files
authored
feat: preserve PascalCase tag names in HTML converter (#138)
1 parent d8d5c79 commit 5a77749

File tree

2 files changed

+140
-22
lines changed

2 files changed

+140
-22
lines changed

src/converter/emitter.rs

Lines changed: 69 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,62 @@
1+
use std::collections::HashMap;
2+
13
use html5ever::tendril::StrTendril;
24
use markup5ever_rcdom::{Handle, NodeData, RcDom};
35

46
use crate::common::is_void_element;
57

68
const INDENT_SIZE: usize = 2;
79

10+
/// Build a mapping from lowercased tag names to their original PascalCase form.
11+
/// html5ever lowercases all tags per HTML5 spec, but Vue/Angular use PascalCase
12+
/// components that we want to preserve.
13+
fn build_case_map(original_html: &str) -> HashMap<String, String> {
14+
let mut map = HashMap::new();
15+
let bytes = original_html.as_bytes();
16+
let mut i = 0;
17+
18+
while i < bytes.len() {
19+
if bytes[i] == b'<' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_alphabetic() {
20+
// Found a tag opening — extract the tag name
21+
let start = i + 1;
22+
let mut end = start;
23+
while end < bytes.len()
24+
&& (bytes[end].is_ascii_alphanumeric() || bytes[end] == b'-' || bytes[end] == b'_')
25+
{
26+
end += 1;
27+
}
28+
let tag = &original_html[start..end];
29+
// Only store if it has uppercase characters (PascalCase)
30+
if tag.chars().any(|c| c.is_ascii_uppercase()) {
31+
let lower = tag.to_ascii_lowercase();
32+
map.entry(lower).or_insert_with(|| tag.to_string());
33+
}
34+
i = end;
35+
} else {
36+
i += 1;
37+
}
38+
}
39+
40+
map
41+
}
42+
43+
/// Resolve a tag name to its original casing if it was PascalCase in the source.
44+
fn resolve_tag_case<'a>(tag: &'a str, case_map: &'a HashMap<String, String>) -> &'a str {
45+
case_map.get(tag).map(|s| s.as_str()).unwrap_or(tag)
46+
}
47+
848
/// Emit HSML source from an html5ever DOM.
949
pub fn emit(dom: &RcDom, original_html: &str) -> String {
1050
let mut output = String::new();
1151
let lower_html = original_html.to_ascii_lowercase();
52+
let case_map = build_case_map(original_html);
1253

1354
// html5ever wraps content in <html><head><body> for document parsing.
1455
// We need to find the meaningful content nodes.
1556
let nodes = find_content_nodes(&dom.document, &lower_html);
1657

1758
for node in &nodes {
18-
emit_node(node, 0, &lower_html, &mut output);
59+
emit_node(node, 0, &lower_html, &case_map, &mut output);
1960
}
2061

2162
if !output.is_empty() && !output.ends_with('\n') {
@@ -142,10 +183,10 @@ fn has_mixed_content(node: &Handle) -> bool {
142183
}
143184

144185
/// Serialize a node's children back to raw HTML (for mixed content).
145-
fn serialize_inner_html(node: &Handle) -> String {
186+
fn serialize_inner_html(node: &Handle, case_map: &HashMap<String, String>) -> String {
146187
let mut html = String::new();
147188
for child in &effective_children(node) {
148-
serialize_node_to_html(child, &mut html, false);
189+
serialize_node_to_html(child, &mut html, false, case_map);
149190
}
150191
html
151192
}
@@ -206,7 +247,12 @@ fn escape_hsml_attr(key: &str, value: &str) -> String {
206247
}
207248
}
208249

209-
fn serialize_node_to_html(node: &Handle, output: &mut String, in_raw_text: bool) {
250+
fn serialize_node_to_html(
251+
node: &Handle,
252+
output: &mut String,
253+
in_raw_text: bool,
254+
case_map: &HashMap<String, String>,
255+
) {
210256
match &node.data {
211257
NodeData::Text { contents } => {
212258
if in_raw_text {
@@ -216,8 +262,9 @@ fn serialize_node_to_html(node: &Handle, output: &mut String, in_raw_text: bool)
216262
}
217263
}
218264
NodeData::Element { name, attrs, .. } => {
219-
let tag = name.local.as_ref();
220-
let is_raw = matches!(tag, "script" | "style");
265+
let lower_tag = name.local.as_ref();
266+
let tag = resolve_tag_case(lower_tag, case_map);
267+
let is_raw = matches!(lower_tag, "script" | "style");
221268
output.push('<');
222269
output.push_str(tag);
223270
for attr in attrs.borrow().iter() {
@@ -227,12 +274,12 @@ fn serialize_node_to_html(node: &Handle, output: &mut String, in_raw_text: bool)
227274
output.push_str(&escape_html_attr(&attr.value));
228275
output.push('"');
229276
}
230-
if is_void_element(tag) {
277+
if is_void_element(lower_tag) {
231278
output.push_str(" />");
232279
} else {
233280
output.push('>');
234281
for child in &effective_children(node) {
235-
serialize_node_to_html(child, output, is_raw);
282+
serialize_node_to_html(child, output, is_raw, case_map);
236283
}
237284
output.push_str("</");
238285
output.push_str(tag);
@@ -248,18 +295,26 @@ fn serialize_node_to_html(node: &Handle, output: &mut String, in_raw_text: bool)
248295
}
249296
}
250297

251-
fn emit_node(node: &Handle, depth: usize, lower_html: &str, output: &mut String) {
298+
fn emit_node(
299+
node: &Handle,
300+
depth: usize,
301+
lower_html: &str,
302+
case_map: &HashMap<String, String>,
303+
output: &mut String,
304+
) {
252305
match &node.data {
253306
NodeData::Doctype { name, .. } => {
254307
output.push_str(&format!("doctype {name}\n"));
255308
}
256309
NodeData::Element { name, attrs, .. } => {
310+
let tag = resolve_tag_case(&name.local, case_map);
257311
emit_element(
258312
node,
259-
&name.local,
313+
tag,
260314
&attrs.borrow(),
261315
depth,
262316
lower_html,
317+
case_map,
263318
output,
264319
);
265320
}
@@ -284,6 +339,7 @@ fn emit_element(
284339
attrs: &[html5ever::interface::Attribute],
285340
depth: usize,
286341
lower_html: &str,
342+
case_map: &HashMap<String, String>,
287343
output: &mut String,
288344
) {
289345
let indent = " ".repeat(depth * INDENT_SIZE);
@@ -412,7 +468,7 @@ fn emit_element(
412468

413469
let raw = if has_non_text {
414470
// Has nested elements/comments — serialize as raw HTML to preserve markup
415-
serialize_inner_html(node)
471+
serialize_inner_html(node, case_map)
416472
} else {
417473
// Text-only — collect raw text content
418474
significant_children
@@ -437,7 +493,7 @@ fn emit_element(
437493

438494
// Check for mixed content
439495
if has_mixed_content(node) {
440-
let inner = serialize_inner_html(node);
496+
let inner = serialize_inner_html(node, case_map);
441497
let trimmed = inner.trim();
442498
if trimmed.contains('\n') {
443499
// Multi-line mixed content → text block
@@ -489,7 +545,7 @@ fn emit_element(
489545
output.push_str(&format!("{indent}{line}\n"));
490546
for child in significant_children {
491547
if !is_synthesized_empty_element(child, lower_html) {
492-
emit_node(child, depth + 1, lower_html, output);
548+
emit_node(child, depth + 1, lower_html, case_map, output);
493549
}
494550
}
495551
}

src/converter/tests/convert.rs

Lines changed: 71 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,69 @@ fn it_should_handle_normal_id_and_class() {
133133
);
134134
}
135135

136+
// --- PascalCase tag preservation ---
137+
138+
#[test]
139+
fn it_should_preserve_pascal_case_vue_components() {
140+
assert_eq!(
141+
conv(r#"<PwaInstallPrompt class="xl:hidden"></PwaInstallPrompt>"#),
142+
"PwaInstallPrompt.xl:hidden\n"
143+
);
144+
}
145+
146+
#[test]
147+
fn it_should_preserve_multiple_pascal_case_components() {
148+
assert_eq!(
149+
conv(
150+
r#"<div><NavUser v-if="show"></NavUser><NavUserSkeleton v-else></NavUserSkeleton></div>"#
151+
),
152+
r#"div
153+
NavUser(v-if="show")
154+
NavUserSkeleton(v-else)
155+
"#
156+
);
157+
}
158+
159+
#[test]
160+
fn it_should_preserve_pascal_case_in_mixed_content() {
161+
assert_eq!(
162+
conv(r#"<p>Hello <MyBadge>World</MyBadge> more</p>"#),
163+
"p Hello <MyBadge>World</MyBadge> more\n"
164+
);
165+
}
166+
167+
// --- Kebab-case custom elements ---
168+
169+
#[test]
170+
fn it_should_preserve_kebab_case_custom_elements() {
171+
assert_eq!(
172+
conv(r#"<my-component class="active"></my-component>"#),
173+
"my-component.active\n"
174+
);
175+
}
176+
177+
#[test]
178+
fn it_should_preserve_kebab_case_web_components() {
179+
assert_eq!(
180+
conv(r#"<pwa-install-prompt class="xl:hidden"></pwa-install-prompt>"#),
181+
"pwa-install-prompt.xl:hidden\n"
182+
);
183+
}
184+
185+
#[test]
186+
fn it_should_not_confuse_kebab_case_with_pascal_case() {
187+
// Both forms in the same document — each preserved as written
188+
assert_eq!(
189+
conv(
190+
r#"<div><PwaBadge class="lg:hidden"></PwaBadge><pwa-badge class="xl:hidden"></pwa-badge></div>"#
191+
),
192+
r#"div
193+
PwaBadge.lg:hidden
194+
pwa-badge.xl:hidden
195+
"#
196+
);
197+
}
198+
136199
// --- Vue/Angular syntax ---
137200

138201
#[test]
@@ -491,7 +554,6 @@ figure.md:flex.bg-slate-100.rounded-xl.p-8.md:p-0.dark:bg-slate-800/10
491554

492555
#[test]
493556
fn it_should_convert_complex_vue_html() {
494-
// TODO @Shinigami92 2026-04-06: PascalCase tags are currently not supported by html5ever
495557
let html = r#"<div ref="container" :class="containerClass">
496558
<div
497559
class="sticky top-0 z-20"
@@ -521,14 +583,14 @@ fn it_should_convert_complex_vue_html() {
521583
</div>
522584
<div class="px-3" flex="~ items-center shrink-0 gap-x-2">
523585
<slot name="actions"></slot>
524-
<pwa-badge class="xl:hidden"></pwa-badge>
525-
<nav-user v-if="isHydrated"></nav-user>
526-
<nav-user-skeleton v-else></nav-user-skeleton>
586+
<PwaBadge class="xl:hidden"></PwaBadge>
587+
<NavUser v-if="isHydrated"></NavUser>
588+
<NavUserSkeleton v-else></NavUserSkeleton>
527589
</div>
528590
</div>
529591
<slot name="header"><div hidden></div></slot>
530592
</div>
531-
<pwa-install-prompt class="xl:hidden"></pwa-install-prompt>
593+
<PwaInstallPrompt class="xl:hidden"></PwaInstallPrompt>
532594
<div
533595
class="m-auto"
534596
:class="isHydrated && wideLayout ? 'xl:w-full sm:max-w-600px' : 'sm:max-w-600px md:shrink-0'"
@@ -558,12 +620,12 @@ fn it_should_convert_complex_vue_html() {
558620
.sm:hidden.h-7.w-1px
559621
.px-3(flex="~ items-center shrink-0 gap-x-2")
560622
slot(name="actions")
561-
pwa-badge.xl:hidden
562-
nav-user(v-if="isHydrated")
563-
nav-user-skeleton(v-else)
623+
PwaBadge.xl:hidden
624+
NavUser(v-if="isHydrated")
625+
NavUserSkeleton(v-else)
564626
slot(name="header")
565627
div(hidden)
566-
pwa-install-prompt.xl:hidden
628+
PwaInstallPrompt.xl:hidden
567629
.m-auto(:class="isHydrated && wideLayout ? 'xl:w-full sm:max-w-600px' : 'sm:max-w-600px md:shrink-0'")
568630
.h-6(hidden, :class="{ 'xl:block': $route.name !== 'tag' && !$slots.header }")
569631
slot

0 commit comments

Comments
 (0)