unhappychoice
diff --git a/‎examples/debug_comment.rs‎
Lines changed: 69 additions & 0 deletions b/‎examples/debug_comment.rs‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎src/extractor/challenge_converter.rs‎
Lines changed: 1 addition & 1 deletion b/‎src/extractor/challenge_converter.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/extractor/core/extractor.rs‎
Lines changed: 65 additions & 25 deletions b/‎src/extractor/core/extractor.rs‎
Lines changed: 65 additions & 25 deletions
diff --git a/‎src/game/stage_renderer.rs‎
Lines changed: 8 additions & 2 deletions b/‎src/game/stage_renderer.rs‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎src/game/text_processor.rs‎
Lines changed: 1 addition & 2 deletions b/‎src/game/text_processor.rs‎
Lines changed: 1 addition & 2 deletions
@@ -0,0 +1,69 @@
+use gittype::game::typing_core::{ProcessingOptions, TypingCore};
+
+fn main() {
+    let code = r#"fn test() {
+    // Path symbols: ../.. and ./path and ~/home  
+    // Unicode arrows: → ← ↑ ↓ and ↵ symbol
+    // Mixed symbols: ../../config.json → ~/.config/
+    let x = 42;
+}"#;
+
+    let comment_ranges = vec![
+        (
+            code.find("// Path symbols").unwrap(),
+            code.find("~/home").unwrap() + "~/home".len(),
+        ),
+        (
+            code.find("// Unicode arrows").unwrap(),
+            code.find("↵ symbol").unwrap() + "↵ symbol".len(),
+        ),
+        (
+            code.find("// Mixed symbols").unwrap(),
+            code.find("~/.config/").unwrap() + "~/.config/".len(),
+        ),
+    ];
+
+    println!(
+        "code.len bytes={}, chars={}",
+        code.len(),
+        code.chars().count()
+    );
+    println!("comment_ranges (bytes): {:?}", comment_ranges);
+
+    let typing_core = TypingCore::new(code, &comment_ranges, ProcessingOptions::default());
+    let display = typing_core.text_to_display().to_string();
+    println!(
+        "display bytes={}, chars={}",
+        display.len(),
+        display.chars().count()
+    );
+    println!("display: {}", display);
+    let ranges = typing_core.display_comment_ranges();
+    println!("display_ranges: {:?}", ranges);
+    for (i, (s, e)) in ranges.iter().enumerate() {
+        let frag = &display[*s..*e];
+        println!("[{}] {:?}", i, frag);
+    }
+
+    // Show how we convert bytes->chars for original ranges
+    let to_char = |b: usize| code[..b.min(code.len())].chars().count();
+    let converted: Vec<(usize, usize)> = comment_ranges
+        .iter()
+        .map(|&(s, e)| (to_char(s), to_char(e)))
+        .collect();
+    println!("converted char ranges: {:?}", converted);
+    let code_chars: Vec<char> = code.chars().collect();
+    for (i, (cs, ce)) in converted.iter().copied().enumerate() {
+        let text: String = code_chars[cs..ce].iter().collect();
+        println!("orig[{}]: {:?} (chars {})", i, text, text.chars().count());
+    }
+
+    // Show line char starts
+    let mut acc = 0usize;
+    for (i, line) in code.lines().enumerate() {
+        let start = acc;
+        let end = start + line.chars().count();
+        println!("line {} chars {}..{} => {:?}", i + 1, start, end, line);
+        acc = end + 1; // account for \n
+    }
+}
@@ -190,7 +190,7 @@ impl ChallengeConverter {
                 // Check if truncated content meets minimum requirements
                 let adjusted_comment_ranges = self.adjust_comment_ranges_for_truncation(
                     &chunk.comment_ranges,
-                    truncated_content.len(),
+                    truncated_content.chars().count(),
                 );
                 let truncated_code_chars =
                     self.count_code_characters(truncated_content, &adjusted_comment_ranges);
 
@@ -70,11 +70,14 @@ impl CommonExtractor {
         while let Some(m) = matches.next() {
             for capture in m.captures {
                 let node = capture.node;
-                let start = node.start_byte();
-                let end = node.end_byte();
+                let start_byte = node.start_byte();
+                let end_byte = node.end_byte();
 
                 if Self::is_valid_comment_node(node, language) {
-                    comment_ranges.push((start, end));
+                    // Convert byte positions to character positions
+                    let start_char = Self::byte_to_char_position(source_code, start_byte);
+                    let end_char = Self::byte_to_char_position(source_code, end_byte);
+                    comment_ranges.push((start_char, end_char));
                 }
             }
         }
@@ -83,6 +86,13 @@ impl CommonExtractor {
         Ok(comment_ranges)
     }
 
+    /// Convert byte position to character position in the given string
+    fn byte_to_char_position(source_code: &str, byte_pos: usize) -> usize {
+        source_code[..byte_pos.min(source_code.len())]
+            .chars()
+            .count()
+    }
+
     fn is_valid_comment_node(node: Node, language: &str) -> bool {
         let node_kind = node.kind();
         match language {
@@ -111,22 +121,27 @@ impl CommonExtractor {
         file_path: &Path,
         language: &str,
         capture_name: &str,
-        file_comment_ranges: &[(usize, usize)],
+        file_comment_ranges: &[(usize, usize)], // Already in character positions
     ) -> Option<CodeChunk> {
         let start_byte = node.start_byte();
         let end_byte = node.end_byte();
         let content = &source_code[start_byte..end_byte];
 
+        // Convert byte positions to character positions to match file_comment_ranges
+        let start_char = Self::byte_to_char_position(source_code, start_byte);
+        let end_char = Self::byte_to_char_position(source_code, end_byte);
+
         let start_line = node.start_position().row + 1;
         let end_line = node.end_position().row + 1;
-        let original_indentation = node.start_position().column;
+        let original_indentation_bytes = node.start_position().column;
 
         // Extract actual indentation characters from source
-        let original_indent_chars = if original_indentation > 0 {
-            Self::extract_line_indent_chars(
+        // Note: original_indentation is in byte units from TreeSitter, but we need char units
+        let original_indent_chars = if original_indentation_bytes > 0 {
+            Self::extract_line_indent_chars_corrected(
                 source_code,
                 node.start_position().row,
-                original_indentation,
+                original_indentation_bytes,
             )
         } else {
             String::new()
@@ -142,22 +157,42 @@ impl CommonExtractor {
             .or_else(|| Self::extract_name(node, source_code))
             .unwrap_or_else(|| "unknown".to_string());
 
+        let normalized_content =
+            Self::normalize_first_line_indentation(content, &original_indent_chars);
+
+        // Simple position calculation:
+        // code_start_pos = start_char (TreeSitter chunk の行頭)
+        // chunk_start_pos = original_indentation (node.start_position().column)
+        // comment_start_pos = comment生pos - code_start_pos
+
+        // Adjust comment ranges to be relative to the normalized content.
+        // Note:
+        // - file_comment_ranges are character-based positions for the whole file
+        // - We first convert them to chunk-relative character positions
+        // - Then we add the first-line indentation characters we injected at the very
+        //   beginning of the normalized content, so display-time positions match
+        let indent_offset_chars = original_indent_chars.chars().count();
+
         let chunk_comment_ranges: Vec<(usize, usize)> = file_comment_ranges
             .iter()
-            .filter_map(|&(comment_start, comment_end)| {
-                if comment_start >= start_byte && comment_end <= end_byte {
-                    Some((comment_start - start_byte, comment_end - start_byte))
+            .filter_map(|&(comment_raw_pos_start, comment_raw_pos_end)| {
+                // Check if comment is within this chunk's boundaries
+                if comment_raw_pos_start >= start_char && comment_raw_pos_end <= end_char {
+                    // Convert to chunk-relative positions
+                    let comment_start_pos = comment_raw_pos_start - start_char;
+                    let comment_end_pos = comment_raw_pos_end - start_char;
+
+                    // Account for added indentation at the very start of normalized content
+                    let adjusted_start = comment_start_pos + indent_offset_chars;
+                    let adjusted_end = comment_end_pos + indent_offset_chars;
+
+                    Some((adjusted_start, adjusted_end))
                 } else {
                     None
                 }
             })
             .collect();
 
-        let normalized_content = Self::normalize_first_line_indentation(
-            content,
-            &original_indent_chars,
-        );
-
         Some(CodeChunk {
             content: normalized_content,
             file_path: file_path.to_path_buf(),
@@ -167,7 +202,8 @@ impl CommonExtractor {
             chunk_type,
             name,
             comment_ranges: chunk_comment_ranges,
-            original_indentation,
+            // Store indentation as character count to keep extractor outputs character-based
+            original_indentation: indent_offset_chars,
         })
     }
 
@@ -207,17 +243,14 @@ impl CommonExtractor {
         None
     }
 
-    fn normalize_first_line_indentation(
-        content: &str,
-        original_indent_chars: &str,
-    ) -> String {
+    fn normalize_first_line_indentation(content: &str, original_indent_chars: &str) -> String {
         let lines: Vec<&str> = content.lines().collect();
         if lines.is_empty() {
             return content.to_string();
         }
 
         let mut result_lines = Vec::new();
-        
+
         for (line_idx, line) in lines.iter().enumerate() {
             if line_idx == 0 {
                 // First line: add original indentation characters from source
@@ -231,15 +264,22 @@ impl CommonExtractor {
         result_lines.join("\n")
     }
 
-    fn extract_line_indent_chars(
+    pub fn extract_line_indent_chars_corrected(
         source_code: &str,
         line_row: usize,
-        indent_length: usize,
+        indent_byte_length: usize,
     ) -> String {
         let lines: Vec<&str> = source_code.lines().collect();
         if line_row < lines.len() {
             let line = lines[line_row];
-            line.chars().take(indent_length).collect()
+            // Convert byte position to character position first
+            if indent_byte_length <= line.len() {
+                let indent_char_count = line[..indent_byte_length].chars().count();
+                line.chars().take(indent_char_count).collect()
+            } else {
+                // If byte length exceeds line length, take all characters
+                line.to_string()
+            }
         } else {
             String::new()
         }
 
@@ -242,6 +242,8 @@ impl StageRenderer {
             lines.push(Line::from(line_spans));
         }
 
+        let mut byte_position = 0; // Track byte position as we iterate
+
         for (i, &ch) in self.chars.iter().enumerate() {
             // Add line number at the start of each line
             if line_start {
@@ -266,14 +268,15 @@ impl StageRenderer {
                 current_line_width = 0;
                 line_number += 1;
                 line_start = true;
+                byte_position += ch.len_utf8(); // Update byte position
                 continue;
             }
 
-            // Check if this character is in a comment
+            // Check if this character is in a comment using byte position
             let is_in_comment = params
                 .display_comment_ranges
                 .iter()
-                .any(|&(start, end)| i >= start && i < end);
+                .any(|&(start, end)| byte_position >= start && byte_position < end);
 
             // Determine character style
             let style = if is_in_comment {
@@ -319,6 +322,9 @@ impl StageRenderer {
 
             current_line_spans.push(Span::styled(display_char, style));
             current_line_width += char_width;
+
+            // Update byte position for next iteration
+            byte_position += ch.len_utf8();
         }
 
         if !current_line_spans.is_empty() {
 
@@ -91,11 +91,10 @@ impl TextProcessor {
                 };
 
                 let mapped_end = if end <= position_mapping.len() {
-                    // Find the last non-None position before end
                     (0..end)
                         .rev()
                         .find_map(|i| position_mapping.get(i).and_then(|&pos| pos))
-                        .map(|pos| pos + 1) // +1 because end is exclusive
+                        .map(|pos| pos + 1)
                 } else {
                     None
                 };