Future-House · whitead · Sep 8, 2024 · Jun 20, 2024 · Jun 21, 2024 · Jun 25, 2024
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -2,25 +2,19 @@ name: publish
 
 on:
   release:
-    types:
-      - created
+    types: [created]
   workflow_dispatch:
 
 jobs:
   publish:
     runs-on: ubuntu-latest
-
     steps:
       - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: 3.11
-          cache: pip
-      - run: pip install .[agents,google,dev,llms]
+      - name: Set up uv
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - run: uv sync
       - name: Build a binary wheel and a source tarball
-        run: |
-          python -m build --sdist --wheel --outdir dist/ .
-      - name: Publish distribution 📦 to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
+        run: uv run python -m build --sdist --wheel --outdir dist/ .
+      - uses: pypa/gh-action-pypi-publish@release/v1
         with:
           password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -4,34 +4,40 @@ on:
   push:
     branches: [main]
   pull_request:
-    branches:
-      - main
-      - "**release**"
 
 jobs:
-  test:
+  pre-commit:
     runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request' # pre-commit-ci/lite-action only runs here
     strategy:
       matrix:
-        python-version: ["3.11"]
-
+        python-version: ["3.10", "3.12"] # Our min and max supported Python versions
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          cache: pip
-      - run: pip install .[agents,google,dev,llms]
-      - name: Check pre-commit
-        run: pre-commit run --all-files || ( git status --short ; git diff ; exit 1 )
+      - uses: pre-commit/[email protected]
+      - uses: pre-commit-ci/[email protected]
+        if: always()
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.12"] # Our min and max supported Python versions
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up uv
+        run: |-
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          uv python pin ${{ matrix.python-version }}
+      - run: uv sync --python-preference=only-managed
       - uses: google-github-actions/auth@v2
         with:
           credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
-      - name: Run Test
+      - run: uv run pytest
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
           SEMANTIC_SCHOLAR_API_KEY: ${{ secrets.SEMANTIC_SCHOLAR_API_KEY }}
           CROSSREF_API_KEY: ${{ secrets.CROSSREF_API_KEY }}
-        run: pytest
diff --git a/.gitignore b/.gitignore
@@ -100,6 +100,7 @@ fabric.properties
 !.vscode/launch.json
 !.vscode/extensions.json
 !.vscode/*.code-snippets
+.vscode
 
 # Local History for Visual Studio Code
 .history/
@@ -114,7 +115,6 @@ fabric.properties
 # Icon must end with two \r
 Icon[\r]
 
-
 # Thumbnails
 ._*
 
@@ -294,12 +294,12 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
 
-*.ipynb
-env
+# Version files made by setuptools_scm
+**/version.py
 
-# Matching pyproject.toml
-paperqa/version.py
-tests/example*
+# Tests
+tests/*txt
+tests/*html
 tests/test_index/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -23,28 +23,16 @@ repos:
       - id: mixed-line-ending
       - id: trailing-whitespace
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.2
+    rev: v0.6.4
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 24.4.2
+    rev: 24.8.0
     hooks:
       - id: black
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.1
-    hooks:
-      - id: mypy
-        args: [--pretty, --ignore-missing-imports]
-        additional_dependencies:
-          - numpy
-          - openai>=1 # Match pyproject.toml
-          - pydantic~=2.0 # Match pyproject.toml
-          - types-requests
-          - types-setuptools
-          - types-PyYAML
   - repo: https://github.com/rbubley/mirrors-prettier
-    rev: v3.3.2
+    rev: v3.3.3
     hooks:
       - id: prettier
   - repo: https://github.com/pappasam/toml-sort
@@ -63,12 +51,32 @@ repos:
             tests/stub_data.*
           )$
   - repo: https://github.com/abravalheri/validate-pyproject
-    rev: v0.18
+    rev: v0.19
     hooks:
       - id: validate-pyproject
         additional_dependencies:
           - "validate-pyproject-schema-store[all]>=2024.06.24" # Pin for Ruff's FURB154
+  - repo: https://github.com/astral-sh/uv-pre-commit
+    rev: 0.4.6
+    hooks:
+      - id: uv-lock
   - repo: https://github.com/adamchainz/blacken-docs
-    rev: v1.12.1
+    rev: 1.18.0
     hooks:
       - id: blacken-docs
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.11.2
+    hooks:
+      - id: mypy
+        args: [--pretty, --ignore-missing-imports]
+        additional_dependencies:
+          - aiohttp
+          - httpx
+          - numpy
+          - openai>=1 # Match pyproject.toml
+          - pydantic~=2.0 # Match pyproject.toml
+          - tenacity
+          - tiktoken>=0.4.0 # Match pyproject.toml
+          - types-requests
+          - types-setuptools
+          - types-PyYAML
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/README.md b/README.md
@@ -173,7 +173,6 @@ local_client = AsyncOpenAI(
 
 docs = Docs(
     client=local_client,
-    docs_index=NumpyVectorStore(embedding_model=LlamaEmbeddingModel()),
     texts_index=NumpyVectorStore(embedding_model=LlamaEmbeddingModel()),
     llm_model=OpenAILLMModel(
         config=dict(
@@ -201,15 +200,12 @@ docs = Docs(embedding="text-embedding-3-large")
 - `"hybrid-<model_name>"` i.e. `"hybrid-text-embedding-3-small"` to use a hybrid sparse keyword (based on a token modulo embedding) and dense vector embedding, any OpenAI or VoyageAI model can be used in the dense model name
 - `"sparse"` to use a sparse keyword embedding only
 
-For deeper embedding customization, embedding models and vector stores can be built separately and passed into the `Docs` object. Embedding models are used to create both paper-qa's index of document citation embedding vectors (`docs_index` argument) as well as the full-text embedding vectors (`texts_index` argument). They can both be specified as arguments when you create a new `Docs` object. You can use use any embedding model which implements paper-qa's `EmbeddingModel` class. For example, to use `text-embedding-3-large`:
+For deeper embedding customization, embedding models and vector stores can be built separately and passed into the `Docs` object. Embedding models are used to create paper-qa's index of the full-text embedding vectors (`texts_index` argument). They can both be specified as arguments when you create a new `Docs` object. You can use use any embedding model which implements paper-qa's `EmbeddingModel` class. For example, to use `text-embedding-3-large`:
 
 ```python
 from paperqa import Docs, NumpyVectorStore, OpenAIEmbeddingModel
 
 docs = Docs(
-    docs_index=NumpyVectorStore(
-        embedding_model=OpenAIEmbeddingModel(name="text-embedding-3-large")
-    ),
     texts_index=NumpyVectorStore(
         embedding_model=OpenAIEmbeddingModel(name="text-embedding-3-large")
     ),
@@ -224,7 +220,6 @@ from langchain_openai import OpenAIEmbeddings
 from paperqa import Docs, LangchainVectorStore
 
 docs = Docs(
-    docs_index=LangchainVectorStore(cls=FAISS, embedding_model=OpenAIEmbeddings()),
     texts_index=LangchainVectorStore(cls=FAISS, embedding_model=OpenAIEmbeddings()),
 )
 ```
@@ -243,7 +238,6 @@ local_client = AsyncOpenAI(
 
 docs = Docs(
     client=local_client,
-    docs_index=NumpyVectorStore(embedding_model=SentenceTransformerEmbeddingModel()),
     texts_index=NumpyVectorStore(embedding_model=SentenceTransformerEmbeddingModel()),
     llm_model=OpenAILLMModel(
         config=dict(
@@ -260,7 +254,6 @@ from paperqa import Docs, HybridEmbeddingModel, SparseEmbeddingModel, NumpyVecto
 
 model = HybridEmbeddingModel(models=[OpenAIEmbeddingModel(), SparseEmbeddingModel()])
 docs = Docs(
-    docs_index=NumpyVectorStore(embedding_model=model),
     texts_index=NumpyVectorStore(embedding_model=model),
 )
 ```
@@ -318,7 +311,6 @@ from langchain_openai import OpenAIEmbeddings
 
 docs = Docs(
     texts_index=LangchainVectorStore(cls=FAISS, embedding_model=OpenAIEmbeddings()),
-    docs_index=LangchainVectorStore(cls=FAISS, embedding_model=OpenAIEmbeddings()),
 )
 ```
 

diff --git a/paperqa/__init__.py b/paperqa/__init__.py
@@ -1,29 +1,22 @@
-from .docs import Answer, Context, Doc, Docs, PromptCollection, Text, print_callback
+from .config import Settings, get_settings
+from .docs import Answer, Docs, print_callback
 from .llms import (
-    AnthropicLLMModel,
     EmbeddingModel,
     HybridEmbeddingModel,
-    LangchainEmbeddingModel,
-    LangchainLLMModel,
     LangchainVectorStore,
-    LlamaEmbeddingModel,
+    LiteLLMEmbeddingModel,
+    LiteLLMModel,
     LLMModel,
     LLMResult,
     NumpyVectorStore,
-    OpenAIEmbeddingModel,
-    OpenAILLMModel,
-    SentenceTransformerEmbeddingModel,
     SparseEmbeddingModel,
     embedding_model_factory,
-    llm_model_factory,
-    vector_store_factory,
 )
-from .types import DocDetails
+from .types import Context, Doc, DocDetails, Text
 from .version import __version__
 
 __all__ = [
     "Answer",
-    "AnthropicLLMModel",
     "Context",
     "Doc",
     "DocDetails",
@@ -32,20 +25,15 @@
     "HybridEmbeddingModel",
     "LLMModel",
     "LLMResult",
-    "LangchainEmbeddingModel",
-    "LangchainLLMModel",
     "LangchainVectorStore",
-    "LlamaEmbeddingModel",
+    "LiteLLMEmbeddingModel",
+    "LiteLLMModel",
     "NumpyVectorStore",
-    "OpenAIEmbeddingModel",
-    "OpenAILLMModel",
-    "PromptCollection",
-    "SentenceTransformerEmbeddingModel",
+    "Settings",
     "SparseEmbeddingModel",
     "Text",
     "__version__",
     "embedding_model_factory",
-    "llm_model_factory",
+    "get_settings",
     "print_callback",
-    "vector_store_factory",
 ]