Skip to content

Commit 3a12db2

Browse files
authored
Fixed spec timings to: accepted/tested instead of accepted/drafted (#14104)
1 parent ae92c18 commit 3a12db2

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

tools/server/server.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3556,16 +3556,16 @@ struct server_context {
35563556
const llama_tokens & cached_text_tokens = slot.cache_tokens.get_text_tokens();
35573557
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, cached_text_tokens, id);
35583558

3559-
// keep track of total number of tokens generated in the draft
3560-
slot.n_draft_total += draft.size();
3561-
35623559
// ignore small drafts
35633560
if (slot.params.speculative.n_min > (int) draft.size()) {
35643561
SLT_DBG(slot, "ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
35653562

35663563
continue;
35673564
}
35683565

3566+
// keep track of total number of drafted tokens tested
3567+
slot.n_draft_total += draft.size();
3568+
35693569
// construct the speculation batch
35703570
common_batch_clear(slot.batch_spec);
35713571
common_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
@@ -3584,7 +3584,7 @@ struct server_context {
35843584
slot.n_past += ids.size();
35853585
slot.n_decoded += ids.size();
35863586

3587-
// update how many tokens out of draft was accepted
3587+
// update how many tokens out of those tested were accepted
35883588
slot.n_draft_accepted += ids.size() - 1;
35893589

35903590
slot.cache_tokens.push_back(id);

0 commit comments

Comments
 (0)