Skip to content

Commit 1f287cb

Browse files
shen-shanshandbyoung18
authored andcommitted
[V1][Structured Output] Clear xgrammar compiler object when engine core shut down to avoid nanobind leaked warning (vllm-project#16954)
Signed-off-by: shen-shanshan <[email protected]>
1 parent 9b810c9 commit 1f287cb

File tree

5 files changed

+17
-0
lines changed

5 files changed

+17
-0
lines changed

vllm/v1/engine/core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ def step_with_batch_queue(self) -> Optional[EngineCoreOutputs]:
253253
return engine_core_outputs
254254

255255
def shutdown(self):
256+
self.structured_output_manager.clear_backend()
256257
if self.model_executor:
257258
self.model_executor.shutdown()
258259

vllm/v1/structured_output/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,7 @@ def grammar_bitmask(
107107
# np.ndarray, because that is much more efficient for serialization
108108
# and deserialization when sending this to the GPU workers.
109109
return bitmask_tensor.numpy()
110+
111+
def clear_backend(self) -> None:
112+
if self.backend is not None:
113+
self.backend.destroy()

vllm/v1/structured_output/backend_guidance.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ def allocate_token_bitmask(self, max_num_seqs: int):
108108
return llguidance_torch.allocate_token_bitmask(
109109
max_num_seqs, self.ll_tokenizer.vocab_size)
110110

111+
def destroy(self):
112+
pass
113+
111114

112115
@dataclass
113116
class GuidanceGrammar(StructuredOutputGrammar):

vllm/v1/structured_output/backend_types.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,9 @@ def allocate_token_bitmask(self, max_num_seqs: int):
8787
max_num_seqs (int): The maximum number of sequences for which
8888
to allocate the bitmask.
8989
"""
90+
91+
@abstractmethod
92+
def destroy(self):
93+
"""
94+
Backend-specific cleanup.
95+
"""

vllm/v1/structured_output/backend_xgrammar.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ def compile_grammar(self, request_type: StructuredOutputOptions,
124124
def allocate_token_bitmask(self, max_num_seqs: int):
125125
return xgr.allocate_token_bitmask(max_num_seqs, self.vocab_size)
126126

127+
def destroy(self):
128+
del self.compiler
129+
127130

128131
@dataclass
129132
class XgrammarGrammar(StructuredOutputGrammar):

0 commit comments

Comments
 (0)