Add property-based testing for vLLM endpoints using an API defined by an OpenAPI 3.1 schema (vllm-project#16721)

tarukumar · njhill · dbyoung18 · commit 07d0eedef540 · 2025-04-29T02:12:11.000Z
Signed-off-by: Tarun Kumar &lt;takumar@redhat.com&gt;
Signed-off-by: Nick Hill &lt;nhill@redhat.com&gt;
Co-authored-by: Nick Hill &lt;nhill@redhat.com&gt;
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -118,7 +118,7 @@ steps:
   - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
   - VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py  --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py  --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
   - pytest -v -s entrypoints/test_chat_utils.py
   - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
 
diff --git a/requirements/test.in b/requirements/test.in
@@ -37,6 +37,7 @@ lm-eval[api]==0.4.8 # required for model evaluation test
 transformers==4.51.1
 tokenizers==0.21.1
 huggingface-hub[hf_xet]>=0.30.0  # Required for Xet downloads.
+schemathesis>=3.39.15 # Required for openai schema test.
 # quantization
 bitsandbytes>=0.45.3
 buildkite-test-collector==0.1.9
diff --git a/requirements/test.txt b/requirements/test.txt
@@ -20,21 +20,29 @@ aiosignal==1.3.1
 annotated-types==0.7.0
     # via pydantic
 anyio==4.6.2.post1
-    # via httpx
+    # via
+    #   httpx
+    #   starlette
 argcomplete==3.5.1
     # via datamodel-code-generator
+arrow==1.3.0
+    # via isoduration
 attrs==24.2.0
     # via
     #   aiohttp
+    #   hypothesis
     #   jsonlines
     #   jsonschema
+    #   pytest-subtests
     #   referencing
 audioread==3.0.1
     # via librosa
 awscli==1.35.23
     # via -r requirements/test.in
 backoff==2.2.1
-    # via -r requirements/test.in
+    # via
+    #   -r requirements/test.in
+    #   schemathesis
 bitsandbytes==0.45.3
     # via -r requirements/test.in
 black==24.10.0
@@ -69,11 +77,13 @@ click==8.1.7
     #   jiwer
     #   nltk
     #   ray
+    #   schemathesis
     #   typer
 colorama==0.4.6
     # via
     #   awscli
     #   sacrebleu
+    #   schemathesis
     #   tqdm-multiprocess
 contourpy==1.3.0
     # via matplotlib
@@ -138,6 +148,8 @@ filelock==3.16.1
     #   transformers
 fonttools==4.54.1
     # via matplotlib
+fqdn==1.5.1
+    # via jsonschema
 frozendict==2.4.6
     # via einx
 frozenlist==1.5.0
@@ -156,16 +168,22 @@ genai-perf==0.0.8
     # via -r requirements/test.in
 genson==1.3.0
     # via datamodel-code-generator
+graphql-core==3.2.6
+    # via hypothesis-graphql
 h11==0.14.0
     # via httpcore
+harfile==0.3.0
+    # via schemathesis
 hf-xet==0.1.4
     # via huggingface-hub
 hiredis==3.0.0
     # via tensorizer
 httpcore==1.0.6
     # via httpx
 httpx==0.27.2
-    # via -r requirements/test.in
+    # via
+    #   -r requirements/test.in
+    #   schemathesis
 huggingface-hub==0.30.1
     # via
     #   -r requirements/test.in
@@ -180,17 +198,29 @@ huggingface-hub==0.30.1
     #   vocos
 humanize==4.11.0
     # via runai-model-streamer
+hypothesis==6.131.0
+    # via
+    #   hypothesis-graphql
+    #   hypothesis-jsonschema
+    #   schemathesis
+hypothesis-graphql==0.11.1
+    # via schemathesis
+hypothesis-jsonschema==0.23.1
+    # via schemathesis
 idna==3.10
     # via
     #   anyio
     #   email-validator
     #   httpx
+    #   jsonschema
     #   requests
     #   yarl
 inflect==5.6.2
     # via datamodel-code-generator
 iniconfig==2.0.0
     # via pytest
+isoduration==20.11.0
+    # via jsonschema
 isort==5.13.2
     # via datamodel-code-generator
 jinja2==3.1.6
@@ -210,12 +240,18 @@ joblib==1.4.2
     #   scikit-learn
 jsonlines==4.0.0
     # via lm-eval
+jsonpointer==3.0.0
+    # via jsonschema
 jsonschema==4.23.0
     # via
+    #   hypothesis-jsonschema
     #   mistral-common
     #   ray
+    #   schemathesis
 jsonschema-specifications==2024.10.1
     # via jsonschema
+junit-xml==1.9
+    # via schemathesis
 kaleido==0.2.1
     # via genai-perf
 kiwisolver==1.4.7
@@ -239,7 +275,9 @@ mamba-ssm==2.2.4
 markdown-it-py==3.0.0
     # via rich
 markupsafe==3.0.2
-    # via jinja2
+    # via
+    #   jinja2
+    #   werkzeug
 matplotlib==3.9.2
     # via -r requirements/test.in
 mbstrdecoder==1.1.3
@@ -449,6 +487,8 @@ pygments==2.18.0
     # via rich
 pyparsing==3.2.0
     # via matplotlib
+pyrate-limiter==3.7.0
+    # via schemathesis
 pytablewriter==1.2.0
     # via lm-eval
 pytest==8.3.3
@@ -461,7 +501,9 @@ pytest==8.3.3
     #   pytest-mock
     #   pytest-rerunfailures
     #   pytest-shard
+    #   pytest-subtests
     #   pytest-timeout
+    #   schemathesis
 pytest-asyncio==0.24.0
     # via -r requirements/test.in
 pytest-forked==1.6.0
@@ -472,10 +514,13 @@ pytest-rerunfailures==14.0
     # via -r requirements/test.in
 pytest-shard==0.1.2
     # via -r requirements/test.in
+pytest-subtests==0.14.1
+    # via schemathesis
 pytest-timeout==2.3.1
     # via -r requirements/test.in
 python-dateutil==2.9.0.post0
     # via
+    #   arrow
     #   botocore
     #   matplotlib
     #   pandas
@@ -497,6 +542,7 @@ pyyaml==6.0.2
     #   peft
     #   ray
     #   responses
+    #   schemathesis
     #   timm
     #   transformers
     #   vocos
@@ -527,10 +573,16 @@ requests==2.32.3
     #   pooch
     #   ray
     #   responses
+    #   schemathesis
+    #   starlette-testclient
     #   tiktoken
     #   transformers
 responses==0.25.3
     # via genai-perf
+rfc3339-validator==0.1.4
+    # via jsonschema
+rfc3987==1.3.8
+    # via jsonschema
 rich==13.9.4
     # via
     #   genai-perf
@@ -559,6 +611,8 @@ safetensors==0.4.5
     #   peft
     #   timm
     #   transformers
+schemathesis==3.39.15
+    # via -r requirements/test.in
 scikit-learn==1.5.2
     # via
     #   librosa
@@ -584,12 +638,16 @@ shellingham==1.5.4
     # via typer
 six==1.16.0
     # via
+    #   junit-xml
     #   python-dateutil
+    #   rfc3339-validator
     #   rouge-score
 sniffio==1.3.1
     # via
     #   anyio
     #   httpx
+sortedcontainers==2.4.0
+    # via hypothesis
 soundfile==0.12.1
     # via
     #   -r requirements/test.in
@@ -598,6 +656,12 @@ soxr==0.5.0.post1
     # via librosa
 sqlitedict==2.1.0
     # via lm-eval
+starlette==0.46.2
+    # via
+    #   schemathesis
+    #   starlette-testclient
+starlette-testclient==0.4.1
+    # via schemathesis
 statsmodels==0.14.4
     # via genai-perf
 sympy==1.13.1
@@ -628,6 +692,10 @@ tokenizers==0.21.1
     # via
     #   -r requirements/test.in
     #   transformers
+tomli==2.2.1
+    # via schemathesis
+tomli-w==1.2.0
+    # via schemathesis
 torch==2.6.0
     # via
     #   -r requirements/test.in
@@ -693,6 +761,8 @@ typepy==1.3.2
     #   tabledata
 typer==0.15.2
     # via fastsafetensors
+types-python-dateutil==2.9.0.20241206
+    # via arrow
 typing-extensions==4.12.2
     # via
     #   huggingface-hub
@@ -705,6 +775,8 @@ typing-extensions==4.12.2
     #   typer
 tzdata==2024.2
     # via pandas
+uri-template==1.3.0
+    # via jsonschema
 urllib3==2.2.3
     # via
     #   blobfile
@@ -716,13 +788,19 @@ vector-quantize-pytorch==1.21.2
     # via -r requirements/test.in
 vocos==0.1.0
     # via -r requirements/test.in
+webcolors==24.11.1
+    # via jsonschema
+werkzeug==3.1.3
+    # via schemathesis
 word2number==1.1
     # via lm-eval
 xxhash==3.5.0
     # via
     #   datasets
     #   evaluate
 yarl==1.17.1
-    # via aiohttp
+    # via
+    #   aiohttp
+    #   schemathesis
 zstandard==0.23.0
     # via lm-eval
diff --git a/tests/entrypoints/openai/test_openai_schema.py b/tests/entrypoints/openai/test_openai_schema.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: Apache-2.0
+import pytest
+import schemathesis
+from schemathesis import GenerationConfig
+
+from ...utils import RemoteOpenAIServer
+
+schemathesis.experimental.OPEN_API_3_1.enable()
+
+MODEL_NAME = "HuggingFaceTB/SmolVLM-256M-Instruct"
+MAXIMUM_IMAGES = 2
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--task",
+        "generate",
+        "--max-model-len",
+        "2048",
+        "--max-num-seqs",
+        "5",
+        "--enforce-eager",
+        "--trust-remote-code",
+        "--limit-mm-per-prompt",
+        f"image={MAXIMUM_IMAGES}",
+    ]
+
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest.fixture(scope="module")
+def get_schema(server):
+    # avoid generating null (\x00) bytes in strings during test case generation
+    return schemathesis.openapi.from_uri(
+        f"{server.url_root}/openapi.json",
+        generation_config=GenerationConfig(allow_x00=False),
+    )
+
+
+schema = schemathesis.from_pytest_fixture("get_schema")
+
+
+@schema.parametrize()
+@schema.override(headers={"Content-Type": "application/json"})
+async def test_openapi_stateless(case):
+    #No need to verify SSL certificate for localhost
+    await case.call_and_validate(verify=False)