Skip to content

Commit 6839932

Browse files
unhappychoiceclaude
andcommitted
feat: add Haskell language support
- Add Haskell parser with comprehensive query patterns - Support function definitions, type signatures, data types, modules - Add comprehensive integration tests with 6 test cases - Enable pattern matching for Haskell-specific syntax constructs 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 411aed1 commit 6839932

File tree

3 files changed

+440
-0
lines changed

3 files changed

+440
-0
lines changed

src/extractor/parsers/haskell.rs

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
use super::LanguageExtractor;
2+
use crate::extractor::models::{ChunkType, Language};
3+
use crate::{GitTypeError, Result};
4+
use tree_sitter::{Node, Parser};
5+
6+
pub struct HaskellExtractor;
7+
8+
impl LanguageExtractor for HaskellExtractor {
9+
fn language(&self) -> Language {
10+
Language::Haskell
11+
}
12+
13+
fn file_extensions(&self) -> &[&str] {
14+
&["hs", "lhs"]
15+
}
16+
17+
fn tree_sitter_language(&self) -> tree_sitter::Language {
18+
tree_sitter_haskell::LANGUAGE.into()
19+
}
20+
21+
fn query_patterns(&self) -> &str {
22+
r#"
23+
; Function definitions with name extraction
24+
(function name: (variable) @function.name) @function.definition
25+
(function (variable) @function.name) @function.named
26+
27+
; Type signatures with name extraction
28+
(signature name: (variable) @signature.name) @signature.definition
29+
(signature (variable) @signature.name) @signature.named
30+
31+
; Module declarations
32+
(module) @module.definition
33+
34+
; Import declarations (if present)
35+
(import) @import.definition
36+
37+
; General declarations with names
38+
(declaration name: (variable) @declaration.name) @declaration.definition
39+
(decl name: (variable) @decl.name) @decl.definition
40+
41+
; Type elements
42+
(type name: (variable) @type.name) @type.definition
43+
44+
; Constructors
45+
(constructor) @constructor.definition
46+
47+
; Fallback patterns for completeness
48+
(function) @function.basic
49+
(signature) @signature.basic
50+
(declaration) @declaration.basic
51+
"#
52+
}
53+
54+
fn comment_query(&self) -> &str {
55+
"[(comment)] @comment"
56+
}
57+
58+
fn capture_name_to_chunk_type(&self, capture_name: &str) -> Option<ChunkType> {
59+
match capture_name {
60+
"function.definition" | "function.named" | "function.basic" => {
61+
Some(ChunkType::Function)
62+
}
63+
"signature.definition" | "signature.named" | "signature.basic" => {
64+
Some(ChunkType::Function)
65+
}
66+
"module.definition" => Some(ChunkType::Module),
67+
"import.definition" => Some(ChunkType::Module),
68+
"declaration.definition" | "declaration.basic" => Some(ChunkType::Function),
69+
"decl.definition" => Some(ChunkType::Function),
70+
"type.definition" => Some(ChunkType::TypeAlias),
71+
"constructor.definition" => Some(ChunkType::Struct),
72+
_ => None,
73+
}
74+
}
75+
76+
fn extract_name(&self, node: Node, source_code: &str, capture_name: &str) -> Option<String> {
77+
match capture_name {
78+
name if name.contains("function") => self.extract_function_name(node, source_code),
79+
name if name.contains("signature") => self.extract_signature_name(node, source_code),
80+
name if name.contains("declaration") || name.contains("decl") => {
81+
self.extract_declaration_name(node, source_code)
82+
}
83+
name if name.contains("type") => self.extract_type_name(node, source_code),
84+
name if name.contains("constructor") => {
85+
self.extract_constructor_name(node, source_code)
86+
}
87+
name if name.contains("module") => self.extract_module_name(node, source_code),
88+
name if name.contains("import") => self.extract_import_name(node, source_code),
89+
_ => self.extract_name_from_node(node, source_code),
90+
}
91+
}
92+
}
93+
94+
impl HaskellExtractor {
95+
fn extract_function_name(&self, node: Node, source_code: &str) -> Option<String> {
96+
// Look for variable node in function definition
97+
self.find_child_by_kind(node, source_code, "variable")
98+
}
99+
100+
fn extract_signature_name(&self, node: Node, source_code: &str) -> Option<String> {
101+
// Look for variable node in type signature
102+
self.find_child_by_kind(node, source_code, "variable")
103+
}
104+
105+
fn extract_type_name(&self, node: Node, source_code: &str) -> Option<String> {
106+
// Look for type node in data/newtype declaration
107+
self.find_child_by_kind(node, source_code, "type")
108+
}
109+
110+
fn extract_declaration_name(&self, node: Node, source_code: &str) -> Option<String> {
111+
// Look for variable node in general declarations
112+
self.find_child_by_kind(node, source_code, "variable")
113+
}
114+
115+
fn extract_constructor_name(&self, node: Node, source_code: &str) -> Option<String> {
116+
// Look for constructor node
117+
self.find_child_by_kind(node, source_code, "constructor")
118+
}
119+
120+
fn extract_module_name(&self, node: Node, source_code: &str) -> Option<String> {
121+
// Look for module_name node
122+
self.find_child_by_kind(node, source_code, "module_name")
123+
}
124+
125+
fn extract_import_name(&self, node: Node, source_code: &str) -> Option<String> {
126+
// Look for module_name node in import
127+
self.find_child_by_kind(node, source_code, "module_name")
128+
}
129+
130+
fn find_child_by_kind(&self, node: Node, source_code: &str, kind: &str) -> Option<String> {
131+
let mut cursor = node.walk();
132+
if cursor.goto_first_child() {
133+
loop {
134+
let child = cursor.node();
135+
if child.kind() == kind {
136+
return child
137+
.utf8_text(source_code.as_bytes())
138+
.ok()
139+
.map(|s| s.to_string());
140+
}
141+
// Recursively search in child nodes
142+
if let Some(name) = self.find_child_by_kind(child, source_code, kind) {
143+
return Some(name);
144+
}
145+
if !cursor.goto_next_sibling() {
146+
break;
147+
}
148+
}
149+
}
150+
None
151+
}
152+
153+
fn extract_name_from_node(&self, node: Node, source_code: &str) -> Option<String> {
154+
let mut cursor = node.walk();
155+
if cursor.goto_first_child() {
156+
loop {
157+
let child = cursor.node();
158+
match child.kind() {
159+
"variable" | "type" | "module_name" | "constructor" => {
160+
return child
161+
.utf8_text(source_code.as_bytes())
162+
.ok()
163+
.map(|s| s.to_string());
164+
}
165+
_ => {
166+
// Recursively search in child nodes
167+
if let Some(name) = self.extract_name_from_node(child, source_code) {
168+
return Some(name);
169+
}
170+
}
171+
}
172+
if !cursor.goto_next_sibling() {
173+
break;
174+
}
175+
}
176+
}
177+
None
178+
}
179+
180+
pub fn create_parser() -> Result<Parser> {
181+
let mut parser = Parser::new();
182+
parser
183+
.set_language(&tree_sitter_haskell::LANGUAGE.into())
184+
.map_err(|e| {
185+
GitTypeError::ExtractionFailed(format!("Failed to set Haskell language: {}", e))
186+
})?;
187+
Ok(parser)
188+
}
189+
}

0 commit comments

Comments
 (0)