@@ -2,6 +2,7 @@ use std::path::Path;
22use std:: fs;
33use tree_sitter:: { Parser , Query , QueryCursor , Node , Tree } ;
44use ignore:: WalkBuilder ;
5+ use rayon:: prelude:: * ;
56use crate :: { Result , GitTypeError } ;
67use super :: { CodeChunk , Language , ChunkType , ProgressReporter , NoOpProgressReporter } ;
78
@@ -22,31 +23,30 @@ impl Default for ExtractionOptions {
2223 }
2324}
2425
25- pub struct CodeExtractor {
26- rust_parser : Parser ,
27- typescript_parser : Parser ,
28- python_parser : Parser ,
29- }
26+ pub struct CodeExtractor ;
3027
3128impl CodeExtractor {
3229 pub fn new ( ) -> Result < Self > {
33- let mut rust_parser = Parser :: new ( ) ;
34- rust_parser. set_language ( tree_sitter_rust:: language ( ) )
35- . map_err ( |e| GitTypeError :: ExtractionFailed ( format ! ( "Failed to set Rust language: {}" , e) ) ) ?;
36-
37- let mut typescript_parser = Parser :: new ( ) ;
38- typescript_parser. set_language ( tree_sitter_typescript:: language_typescript ( ) )
39- . map_err ( |e| GitTypeError :: ExtractionFailed ( format ! ( "Failed to set TypeScript language: {}" , e) ) ) ?;
40-
41- let mut python_parser = Parser :: new ( ) ;
42- python_parser. set_language ( tree_sitter_python:: language ( ) )
43- . map_err ( |e| GitTypeError :: ExtractionFailed ( format ! ( "Failed to set Python language: {}" , e) ) ) ?;
44-
45- Ok ( Self {
46- rust_parser,
47- typescript_parser,
48- python_parser,
49- } )
30+ Ok ( Self )
31+ }
32+
33+ fn create_parser_for_language ( language : Language ) -> Result < Parser > {
34+ let mut parser = Parser :: new ( ) ;
35+ match language {
36+ Language :: Rust => {
37+ parser. set_language ( tree_sitter_rust:: language ( ) )
38+ . map_err ( |e| GitTypeError :: ExtractionFailed ( format ! ( "Failed to set Rust language: {}" , e) ) ) ?;
39+ }
40+ Language :: TypeScript => {
41+ parser. set_language ( tree_sitter_typescript:: language_typescript ( ) )
42+ . map_err ( |e| GitTypeError :: ExtractionFailed ( format ! ( "Failed to set TypeScript language: {}" , e) ) ) ?;
43+ }
44+ Language :: Python => {
45+ parser. set_language ( tree_sitter_python:: language ( ) )
46+ . map_err ( |e| GitTypeError :: ExtractionFailed ( format ! ( "Failed to set Python language: {}" , e) ) ) ?;
47+ }
48+ }
49+ Ok ( parser)
5050 }
5151
5252 pub fn extract_chunks ( & mut self , repo_path : & Path , options : ExtractionOptions ) -> Result < Vec < CodeChunk > > {
@@ -59,8 +59,6 @@ impl CodeExtractor {
5959 options : ExtractionOptions ,
6060 progress : & P ,
6161 ) -> Result < Vec < CodeChunk > > {
62- let mut chunks = Vec :: new ( ) ;
63-
6462 progress. set_phase ( "Scanning repository" . to_string ( ) ) ;
6563
6664 // Use ignore crate to respect .gitignore files
@@ -83,7 +81,7 @@ impl CodeExtractor {
8381
8482 if let Some ( extension) = path. extension ( ) . and_then ( |e| e. to_str ( ) ) {
8583 if let Some ( language) = Language :: from_extension ( extension) {
86- if self . should_process_file ( path, & options) {
84+ if Self :: should_process_file_static ( path, & options) {
8785 files_to_process. push ( ( path. to_path_buf ( ) , language) ) ;
8886 }
8987 }
@@ -93,13 +91,18 @@ impl CodeExtractor {
9391 let total_files = files_to_process. len ( ) ;
9492 progress. set_phase ( "Parsing AST" . to_string ( ) ) ;
9593
96- for ( i, ( path, language) ) in files_to_process. iter ( ) . enumerate ( ) {
97- progress. set_file_counts ( i + 1 , total_files) ;
98- if i % 3 == 0 { // Update spinner every 3 files to reduce flicker
99- progress. update_spinner ( ) ;
100- }
101-
102- let file_chunks = self . extract_from_file ( path, * language, & options) ?;
94+ // Process files in parallel using rayon without progress updates during parallel processing
95+ let all_chunks: Result < Vec < Vec < CodeChunk > > > = files_to_process
96+ . par_iter ( )
97+ . map ( |( path, language) | {
98+ // Extract chunks from file
99+ Self :: extract_from_file_static ( path, * language, & options)
100+ } )
101+ . collect ( ) ;
102+
103+ let all_chunks = all_chunks?;
104+ let mut chunks = Vec :: new ( ) ;
105+ for file_chunks in all_chunks {
103106 chunks. extend ( file_chunks) ;
104107 }
105108
@@ -111,6 +114,10 @@ impl CodeExtractor {
111114 }
112115
113116 fn should_process_file ( & self , path : & Path , options : & ExtractionOptions ) -> bool {
117+ Self :: should_process_file_static ( path, options)
118+ }
119+
120+ fn should_process_file_static ( path : & Path , options : & ExtractionOptions ) -> bool {
114121 let path_str = path. to_string_lossy ( ) ;
115122
116123 // Check exclude patterns first
@@ -135,17 +142,17 @@ impl CodeExtractor {
135142 }
136143
137144 pub fn extract_from_file ( & mut self , file_path : & Path , language : Language , options : & ExtractionOptions ) -> Result < Vec < CodeChunk > > {
145+ Self :: extract_from_file_static ( file_path, language, options)
146+ }
147+
148+ fn extract_from_file_static ( file_path : & Path , language : Language , options : & ExtractionOptions ) -> Result < Vec < CodeChunk > > {
138149 let content = fs:: read_to_string ( file_path) ?;
139- let parser = match language {
140- Language :: Rust => & mut self . rust_parser ,
141- Language :: TypeScript => & mut self . typescript_parser ,
142- Language :: Python => & mut self . python_parser ,
143- } ;
150+ let mut parser = Self :: create_parser_for_language ( language) ?;
144151
145152 let tree = parser. parse ( & content, None )
146153 . ok_or_else ( || GitTypeError :: ExtractionFailed ( format ! ( "Failed to parse file: {:?}" , file_path) ) ) ?;
147154
148- self . extract_chunks_from_tree ( & tree, & content, file_path, language, options)
155+ Self :: extract_chunks_from_tree_static ( & tree, & content, file_path, language, options)
149156 }
150157
151158 fn extract_chunks_from_tree (
@@ -155,11 +162,21 @@ impl CodeExtractor {
155162 file_path : & Path ,
156163 language : Language ,
157164 options : & ExtractionOptions ,
165+ ) -> Result < Vec < CodeChunk > > {
166+ Self :: extract_chunks_from_tree_static ( tree, source_code, file_path, language, options)
167+ }
168+
169+ fn extract_chunks_from_tree_static (
170+ tree : & Tree ,
171+ source_code : & str ,
172+ file_path : & Path ,
173+ language : Language ,
174+ options : & ExtractionOptions ,
158175 ) -> Result < Vec < CodeChunk > > {
159176 let mut chunks = Vec :: new ( ) ;
160177
161178 // Extract comment ranges for the entire file
162- let file_comment_ranges = self . extract_comment_ranges ( tree, source_code, language. clone ( ) ) ;
179+ let file_comment_ranges = Self :: extract_comment_ranges_static ( tree, source_code, language. clone ( ) ) ;
163180
164181 let query_str = match language {
165182 Language :: Rust => "
@@ -191,7 +208,7 @@ impl CodeExtractor {
191208 let node = capture. node ;
192209 let capture_name = & query. capture_names ( ) [ capture. index as usize ] ;
193210
194- if let Some ( chunk) = self . node_to_chunk ( node, source_code, file_path, language. clone ( ) , & capture_name, options, & file_comment_ranges) {
211+ if let Some ( chunk) = Self :: node_to_chunk_static ( node, source_code, file_path, language. clone ( ) , & capture_name, options, & file_comment_ranges) {
195212 chunks. push ( chunk) ;
196213 }
197214 }
@@ -209,6 +226,18 @@ impl CodeExtractor {
209226 capture_name : & str ,
210227 options : & ExtractionOptions ,
211228 file_comment_ranges : & [ ( usize , usize ) ] ,
229+ ) -> Option < CodeChunk > {
230+ Self :: node_to_chunk_static ( node, source_code, file_path, language, capture_name, options, file_comment_ranges)
231+ }
232+
233+ fn node_to_chunk_static (
234+ node : Node ,
235+ source_code : & str ,
236+ file_path : & Path ,
237+ language : Language ,
238+ capture_name : & str ,
239+ options : & ExtractionOptions ,
240+ file_comment_ranges : & [ ( usize , usize ) ] ,
212241 ) -> Option < CodeChunk > {
213242 let start_byte = node. start_byte ( ) ;
214243 let end_byte = node. end_byte ( ) ;
@@ -234,7 +263,7 @@ impl CodeExtractor {
234263 _ => return None ,
235264 } ;
236265
237- let name = self . extract_name ( node, source_code) . unwrap_or_else ( || "unknown" . to_string ( ) ) ;
266+ let name = Self :: extract_name_static ( node, source_code) . unwrap_or_else ( || "unknown" . to_string ( ) ) ;
238267
239268 // Filter comment ranges that are within this chunk and make them relative to chunk content
240269 let chunk_comment_ranges: Vec < ( usize , usize ) > = file_comment_ranges. iter ( )
@@ -249,7 +278,7 @@ impl CodeExtractor {
249278 . collect ( ) ;
250279
251280 // Normalize indentation based on AST node position
252- let ( normalized_content, normalized_comment_ranges) = self . normalize_indentation (
281+ let ( normalized_content, normalized_comment_ranges) = Self :: normalize_indentation_static (
253282 content,
254283 original_indentation,
255284 & chunk_comment_ranges
@@ -269,6 +298,10 @@ impl CodeExtractor {
269298 }
270299
271300 fn extract_name ( & self , node : Node , source_code : & str ) -> Option < String > {
301+ Self :: extract_name_static ( node, source_code)
302+ }
303+
304+ fn extract_name_static ( node : Node , source_code : & str ) -> Option < String > {
272305 // For variable_declarator, we need to get the name from the first child
273306 if node. kind ( ) == "variable_declarator" {
274307 let mut cursor = node. walk ( ) ;
@@ -301,6 +334,10 @@ impl CodeExtractor {
301334 }
302335
303336 fn normalize_indentation ( & self , content : & str , original_indentation : usize , comment_ranges : & [ ( usize , usize ) ] ) -> ( String , Vec < ( usize , usize ) > ) {
337+ Self :: normalize_indentation_static ( content, original_indentation, comment_ranges)
338+ }
339+
340+ fn normalize_indentation_static ( content : & str , original_indentation : usize , comment_ranges : & [ ( usize , usize ) ] ) -> ( String , Vec < ( usize , usize ) > ) {
304341 let lines: Vec < & str > = content. lines ( ) . collect ( ) ;
305342 if lines. is_empty ( ) {
306343 return ( content. to_string ( ) , comment_ranges. to_vec ( ) ) ;
@@ -396,6 +433,10 @@ impl CodeExtractor {
396433 }
397434
398435 fn extract_comment_ranges ( & self , tree : & Tree , source_code : & str , language : Language ) -> Vec < ( usize , usize ) > {
436+ Self :: extract_comment_ranges_static ( tree, source_code, language)
437+ }
438+
439+ fn extract_comment_ranges_static ( tree : & Tree , source_code : & str , language : Language ) -> Vec < ( usize , usize ) > {
399440 let mut comment_ranges = Vec :: new ( ) ;
400441
401442 let comment_query = match language {
0 commit comments