File tree Expand file tree Collapse file tree 1 file changed +8
-2
lines changed Expand file tree Collapse file tree 1 file changed +8
-2
lines changed Original file line number Diff line number Diff line change @@ -105,18 +105,24 @@ def parse_text(
105
105
"""
106
106
try :
107
107
with path .open () as f :
108
- text : str = "" . join ( list (f ) ) if split_lines else f .read ()
108
+ text = list (f ) if split_lines else f .read ()
109
109
except UnicodeDecodeError :
110
110
with path .open (encoding = "utf-8" , errors = "ignore" ) as f :
111
111
text = f .read ()
112
112
113
113
if html :
114
+ if not isinstance (text , str ):
115
+ raise NotImplementedError (
116
+ "HTML parsing is not yet set up to work with split_lines."
117
+ )
114
118
text = html2text .html2text (text )
115
119
116
120
metadata = {
117
121
"parsing_libraries" : ["tiktoken (cl100k_base)" ] if use_tiktoken else [],
118
122
"paperqa_version" : pqa_version ,
119
- "total_parsed_text_length" : len (text ),
123
+ "total_parsed_text_length" : (
124
+ len (text ) if isinstance (text , str ) else sum (len (t ) for t in text )
125
+ ),
120
126
"parse_type" : "txt" if not html else "html" ,
121
127
}
122
128
if html :
You can’t perform that action at this time.
0 commit comments