Skip to content

Commit f16240a

Browse files
authored
Fixed sonnet json formatting issue (#293)
* Fixed sonnet json formatting issue * PR comments - addedd notes and types
1 parent 6581202 commit f16240a

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed

paperqa/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,15 @@ def llm_read_json(text: str) -> dict:
184184
text = "{" + text.split("{", 1)[-1]
185185
# split anything after the last }
186186
text = text.rsplit("}", 1)[0] + "}"
187+
188+
# escape new lines within strings
189+
def replace_newlines(match: re.Match) -> str:
190+
return match.group(0).replace("\n", "\\n")
191+
192+
# Match anything between double quotes
193+
# including escaped quotes and other escaped characters.
194+
# https://regex101.com/r/VFcDmB/1
195+
pattern = r'"(?:[^"\\]|\\.)*"'
196+
text = re.sub(pattern, replace_newlines, text)
197+
187198
return json.loads(text)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ name = "paper-qa"
4040
readme = "README.md"
4141
requires-python = ">=3.8"
4242
urls = {repository = "https://github.com/whitead/paper-qa"}
43-
version = "4.8.0"
43+
version = "4.8.1"
4444

4545
[tool.codespell]
4646
check-filenames = true

tests/test_paperqa.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import pickle
55
import tempfile
6+
import textwrap
67
from io import BytesIO
78
from pathlib import Path
89

@@ -457,6 +458,23 @@ def test_llm_read_json(example: str):
457458
assert llm_read_json(example) == {"example": "json"}
458459

459460

461+
def test_llm_read_json_newlines():
462+
"""Make sure that newlines in json are preserved and escaped."""
463+
example = textwrap.dedent(
464+
"""
465+
{
466+
"summary": "A line
467+
468+
Another line",
469+
"relevance_score": 7
470+
}"""
471+
)
472+
assert llm_read_json(example) == {
473+
"summary": "A line\n\nAnother line",
474+
"relevance_score": 7,
475+
}
476+
477+
460478
@pytest.mark.asyncio()
461479
async def test_chain_completion():
462480
client = AsyncOpenAI()

0 commit comments

Comments
 (0)