We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7416f68 commit 8b14fdaCopy full SHA for 8b14fda
vllm/v1/spec_decode/eagle.py
@@ -264,7 +264,9 @@ def compute_probs_and_sample_next_token(
264
# TODO(woosuk): Consider seeds.
265
q = torch.empty_like(probs)
266
q.exponential_()
267
- next_token_ids = probs.div_(q).argmax(dim=-1).view(-1)
+ # NOTE(woosuk): We shouldn't use `probs.div_(q)` because the draft_probs
268
+ # will be used later for rejection sampling.
269
+ next_token_ids = probs.div(q).argmax(dim=-1).view(-1)
270
if not sampling_metadata.all_random:
271
greedy_token_ids = probs.argmax(dim=-1)
272
next_token_ids = torch.where(
0 commit comments