Future-House · maykcaldas · Nov 14, 2024 · Nov 14, 2024 · Nov 15, 2024 · Nov 16, 2024
diff --git a/paperqa/__init__.py b/paperqa/__init__.py
@@ -15,6 +15,8 @@
     HybridEmbeddingModel,
     LiteLLMEmbeddingModel,
     LiteLLMModel,
+    OpenAIBatchLLMModel,
+    AnthropicBatchLLMModel,
     LLMModel,
     LLMResult,
     NumpyVectorStore,
@@ -38,6 +40,8 @@
     "LLMResult",
     "LiteLLMEmbeddingModel",
     "LiteLLMModel",
+    "OpenAIBatchLLMModel",
+    "AnthropicBatchLLMModel",
     "NumpyVectorStore",
     "PQASession",
     "QueryRequest",

diff --git a/paperqa/core.py b/paperqa/core.py
@@ -115,3 +115,50 @@ async def map_fxn_summary(
         ),
         llm_result,
     )
+
+async def gather_with_batch(
+    matches: list[Text],
+    question: str,
+    prompt_runner: PromptRunner | None,
+    extra_prompt_data: dict[str, str] | None = None,
+    parser: Callable[[str], dict[str, Any]] | None = None,
+    callbacks: list[Callable[[str], None]] | None = None,
+    ) -> list[tuple[Context, LLMResult]]:
+        """Gathers a batch of results for a given text."""
+        data = [
+                {"question": question, 
+                    "citation": m.name + ": " + m.doc.formatted_citation, 
+                    "text": m.text} | 
+                    extra_prompt_data or {}
+                for m in matches
+            ]
+
+        llm_results = await prompt_runner(
+                    data,
+                    callbacks,
+                )
+
+        results_data = []
+        scores = []
+        for r in llm_results:
+            try:
+                results_data.append(parser(r.text))
+                scores.append(r.pop("relevance_score"))
+                # just in case question was present
+                r.pop("question", None)
+            except:
+                results_data.append({})
+                scores.append(extract_score(r.text))
+
+        return [
+                    (
+                        Context(
+                            context=strip_citations(llm_result.text),
+                            text=m,
+                            model_extra={},
+                            score=score,
+                            **r,
+                        ),
+                        llm_result,
+                    ) for r, m, llm_result, score in zip(results_data, matches, llm_results, scores)
+        ]
diff --git a/paperqa/docs.py b/paperqa/docs.py
@@ -22,7 +22,11 @@
 )
 
 from paperqa.clients import DEFAULT_CLIENTS, DocMetadataClient
-from paperqa.core import llm_parse_json, map_fxn_summary
+from paperqa.core import (
+    llm_parse_json, 
+    map_fxn_summary,
+    gather_with_batch
+)
 from paperqa.llms import (
     EmbeddingModel,
     LLMModel,
@@ -40,6 +44,7 @@
     LLMResult,
     PQASession,
     Text,
+    Context,
     set_llm_session_ids,
 )
 from paperqa.utils import (
@@ -50,6 +55,8 @@
     maybe_is_text,
     md5sum,
     name_in_text,
+    extract_score,
+    strip_citations
 )
 
 logger = logging.getLogger(__name__)
@@ -600,23 +607,35 @@ async def aget_evidence(
                 )
 
         with set_llm_session_ids(session.id):
-            results = await gather_with_concurrency(
-                answer_config.max_concurrent_requests,
-                [
-                    map_fxn_summary(
-                        text=m,
-                        question=session.question,
-                        prompt_runner=prompt_runner,
-                        extra_prompt_data={
-                            "summary_length": answer_config.evidence_summary_length,
-                            "citation": f"{m.name}: {m.doc.formatted_citation}",
-                        },
-                        parser=llm_parse_json if prompt_config.use_json else None,
-                        callbacks=callbacks,
-                    )
-                    for m in matches
-                ],
-            )
+            if evidence_settings.use_batch_in_summary:
+                results = await gather_with_batch(
+                    matches = matches,
+                    question = session.question,
+                    prompt_runner=prompt_runner,
+                    extra_prompt_data={
+                        "summary_length": answer_config.evidence_summary_length,
+                    },
+                    parser=llm_parse_json if prompt_config.use_json else None,
+                    callbacks=callbacks,
+                )
+            else:
+                results = await gather_with_concurrency(
+                    answer_config.max_concurrent_requests,
+                    [
+                        map_fxn_summary(
+                            text=m,
+                            question=session.question,
+                            prompt_runner=prompt_runner,
+                            extra_prompt_data={
+                                "summary_length": answer_config.evidence_summary_length,
+                                "citation": f"{m.name}: {m.doc.formatted_citation}",
+                            },
+                            parser=llm_parse_json if prompt_config.use_json else None,
+                            callbacks=callbacks,
+                        )
+                        for m in matches
+                    ],
+                )
 
         for _, llm_result in results:
             session.add_tokens(llm_result)