optimize tokenize method

LostRuins · LostRuins · commit 661bede62fe2 · 2023-08-24T21:16:16.000+08:00
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -338,34 +338,32 @@ static std::string FileFormatTokenizeID(int id, FileFormat file_format)
     }
 }
 
-static std::vector<int> TokenizeString(const std::string & str_to_tokenize, FileFormat file_format)
+static void TokenizeString(const std::string & str_to_tokenize, std::vector<int> & output_tokens, FileFormat file_format)
 {
-    std::vector<int> tokvec;
     if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2  || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA)
     {
         if(file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 )
         {
-            tokvec = ::llama_v2_tokenize(llama_ctx_v2, str_to_tokenize, true);
+            output_tokens = ::llama_v2_tokenize(llama_ctx_v2, str_to_tokenize, true);
         }
         else if (file_format == FileFormat::GGML)
         {
-            tokvec = ::legacy_llama_v2_tokenize(llama_ctx_v2, str_to_tokenize, true);
+            output_tokens = ::legacy_llama_v2_tokenize(llama_ctx_v2, str_to_tokenize, true);
         }
         else if (file_format == FileFormat::GGJT_3)
         {
-            tokvec = ::llama_v3_tokenize(llama_ctx_v3, str_to_tokenize, true);
+            output_tokens = ::llama_v3_tokenize(llama_ctx_v3, str_to_tokenize, true);
         }
         else
         {
-            tokvec = ::llama_tokenize(llama_ctx_v4, str_to_tokenize, true);
+            output_tokens = ::llama_tokenize(llama_ctx_v4, str_to_tokenize, true);
         }
     }
     else
     {
         // tokenize the prompt
-        tokvec = ::gpt_tokenize(vocab, str_to_tokenize);
+        output_tokens = ::gpt_tokenize(vocab, str_to_tokenize);
     }
-    return tokvec;
 }
 
 static std::string RemoveBell(const std::string & input) //removes the bell character
@@ -1001,7 +999,8 @@ int gpttype_token_count(const std::string & input)
     {
         printf("\nFileFormat: %d, Tokenizing: %s",file_format ,input.c_str());
     }
-    auto toks = TokenizeString(input, file_format);
+    std::vector<int> toks;
+    TokenizeString(input, toks, file_format);
     int tokcount = toks.size();
     if(debugmode==1)
     {
@@ -1063,7 +1062,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     }
 
     // tokenize the prompt
-    std::vector<int> embd_inp = TokenizeString(params.prompt, file_format);
+    std::vector<int> embd_inp;
+    TokenizeString(params.prompt, embd_inp, file_format);
 
     //truncate to front of the prompt if its too long
     int32_t nctx = params.n_ctx;

Original file line number	Diff line number	Diff line change
`@@ -338,34 +338,32 @@ static std::string FileFormatTokenizeID(int id, FileFormat file_format)`
`338`	`338`	`}`
`339`	`339`	`}`
`340`	`340`
`341`		`-static std::vector<int> TokenizeString(const std::string & str_to_tokenize, FileFormat file_format)`
	`341`	`+static void TokenizeString(const std::string & str_to_tokenize, std::vector<int> & output_tokens, FileFormat file_format)`
`342`	`342`	`{`
`343`		`- std::vector<int> tokvec;`
`344`	`343`	`if (file_format == FileFormat::GGML \|\| file_format == FileFormat::GGHF \|\| file_format == FileFormat::GGJT \|\| file_format == FileFormat::GGJT_2 \|\| file_format == FileFormat::GGJT_3 \|\| file_format == FileFormat::GGUF_LLAMA)`
`345`	`344`	`{`
`346`	`345`	`if(file_format == FileFormat::GGHF \|\| file_format == FileFormat::GGJT \|\| file_format == FileFormat::GGJT_2 )`
`347`	`346`	`{`
`348`		`- tokvec = ::llama_v2_tokenize(llama_ctx_v2, str_to_tokenize, true);`
	`347`	`+ output_tokens = ::llama_v2_tokenize(llama_ctx_v2, str_to_tokenize, true);`
`349`	`348`	`}`
`350`	`349`	`else if (file_format == FileFormat::GGML)`
`351`	`350`	`{`
`352`		`- tokvec = ::legacy_llama_v2_tokenize(llama_ctx_v2, str_to_tokenize, true);`
	`351`	`+ output_tokens = ::legacy_llama_v2_tokenize(llama_ctx_v2, str_to_tokenize, true);`
`353`	`352`	`}`
`354`	`353`	`else if (file_format == FileFormat::GGJT_3)`
`355`	`354`	`{`
`356`		`- tokvec = ::llama_v3_tokenize(llama_ctx_v3, str_to_tokenize, true);`
	`355`	`+ output_tokens = ::llama_v3_tokenize(llama_ctx_v3, str_to_tokenize, true);`
`357`	`356`	`}`
`358`	`357`	`else`
`359`	`358`	`{`
`360`		`- tokvec = ::llama_tokenize(llama_ctx_v4, str_to_tokenize, true);`
	`359`	`+ output_tokens = ::llama_tokenize(llama_ctx_v4, str_to_tokenize, true);`
`361`	`360`	`}`
`362`	`361`	`}`
`363`	`362`	`else`
`364`	`363`	`{`
`365`	`364`	`// tokenize the prompt`
`366`		`- tokvec = ::gpt_tokenize(vocab, str_to_tokenize);`
	`365`	`+ output_tokens = ::gpt_tokenize(vocab, str_to_tokenize);`
`367`	`366`	`}`
`368`		`- return tokvec;`
`369`	`367`	`}`
`370`	`368`
`371`	`369`	`static std::string RemoveBell(const std::string & input) //removes the bell character`
`@@ -1001,7 +999,8 @@ int gpttype_token_count(const std::string & input)`
`1001`	`999`	`{`
`1002`	`1000`	`printf("\nFileFormat: %d, Tokenizing: %s",file_format ,input.c_str());`
`1003`	`1001`	`}`
`1004`		`- auto toks = TokenizeString(input, file_format);`
	`1002`	`+ std::vector<int> toks;`
	`1003`	`+ TokenizeString(input, toks, file_format);`
`1005`	`1004`	`int tokcount = toks.size();`
`1006`	`1005`	`if(debugmode==1)`
`1007`	`1006`	`{`
`@@ -1063,7 +1062,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o`
`1063`	`1062`	`}`
`1064`	`1063`
`1065`	`1064`	`// tokenize the prompt`
`1066`		`- std::vector<int> embd_inp = TokenizeString(params.prompt, file_format);`
	`1065`	`+ std::vector<int> embd_inp;`
	`1066`	`+ TokenizeString(params.prompt, embd_inp, file_format);`
`1067`	`1067`
`1068`	`1068`	`//truncate to front of the prompt if its too long`
`1069`	`1069`	`int32_t nctx = params.n_ctx;`