@@ -338,34 +338,32 @@ static std::string FileFormatTokenizeID(int id, FileFormat file_format)
338
338
}
339
339
}
340
340
341
- static std::vector< int > TokenizeString (const std::string & str_to_tokenize, FileFormat file_format)
341
+ static void TokenizeString (const std::string & str_to_tokenize, std::vector< int > & output_tokens , FileFormat file_format)
342
342
{
343
- std::vector<int > tokvec;
344
343
if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA)
345
344
{
346
345
if (file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 )
347
346
{
348
- tokvec = ::llama_v2_tokenize (llama_ctx_v2, str_to_tokenize, true );
347
+ output_tokens = ::llama_v2_tokenize (llama_ctx_v2, str_to_tokenize, true );
349
348
}
350
349
else if (file_format == FileFormat::GGML)
351
350
{
352
- tokvec = ::legacy_llama_v2_tokenize (llama_ctx_v2, str_to_tokenize, true );
351
+ output_tokens = ::legacy_llama_v2_tokenize (llama_ctx_v2, str_to_tokenize, true );
353
352
}
354
353
else if (file_format == FileFormat::GGJT_3)
355
354
{
356
- tokvec = ::llama_v3_tokenize (llama_ctx_v3, str_to_tokenize, true );
355
+ output_tokens = ::llama_v3_tokenize (llama_ctx_v3, str_to_tokenize, true );
357
356
}
358
357
else
359
358
{
360
- tokvec = ::llama_tokenize (llama_ctx_v4, str_to_tokenize, true );
359
+ output_tokens = ::llama_tokenize (llama_ctx_v4, str_to_tokenize, true );
361
360
}
362
361
}
363
362
else
364
363
{
365
364
// tokenize the prompt
366
- tokvec = ::gpt_tokenize (vocab, str_to_tokenize);
365
+ output_tokens = ::gpt_tokenize (vocab, str_to_tokenize);
367
366
}
368
- return tokvec;
369
367
}
370
368
371
369
static std::string RemoveBell (const std::string & input) // removes the bell character
@@ -1001,7 +999,8 @@ int gpttype_token_count(const std::string & input)
1001
999
{
1002
1000
printf (" \n FileFormat: %d, Tokenizing: %s" ,file_format ,input.c_str ());
1003
1001
}
1004
- auto toks = TokenizeString (input, file_format);
1002
+ std::vector<int > toks;
1003
+ TokenizeString (input, toks, file_format);
1005
1004
int tokcount = toks.size ();
1006
1005
if (debugmode==1 )
1007
1006
{
@@ -1063,7 +1062,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
1063
1062
}
1064
1063
1065
1064
// tokenize the prompt
1066
- std::vector<int > embd_inp = TokenizeString (params.prompt , file_format);
1065
+ std::vector<int > embd_inp;
1066
+ TokenizeString (params.prompt , embd_inp, file_format);
1067
1067
1068
1068
// truncate to front of the prompt if its too long
1069
1069
int32_t nctx = params.n_ctx ;
0 commit comments