Skip to content

Commit 19178fa

Browse files
committed
2048 context all core
1 parent 7e6f4ed commit 19178fa

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

llama_for_kobold.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ class generation_outputs(ctypes.Structure):
3737
handle.generate.argtypes = [generation_inputs, ctypes.c_wchar_p] #apparently needed for osx to work. i duno why they need to interpret it that way but whatever
3838
handle.generate.restype = generation_outputs
3939

40-
def load_model(model_filename,batch_size=8,max_context_length=512,threads=4,n_parts_overwrite=-1):
40+
def load_model(model_filename,batch_size=8,max_context_length=2048,n_parts_overwrite=-1):
4141
inputs = load_model_inputs()
4242
inputs.model_filename = model_filename.encode("UTF-8")
4343
inputs.batch_size = batch_size
4444
inputs.max_context_length = max_context_length
45-
inputs.threads = threads
45+
inputs.threads = os.cpu_count()
4646
inputs.n_parts_overwrite = n_parts_overwrite
4747
ret = handle.load_model(inputs)
4848
return ret
@@ -74,7 +74,7 @@ def generate(prompt,max_length=20,temperature=0.8,top_k=100,top_p=0.85,rep_pen=1
7474
# global vars
7575
global friendlymodelname
7676
friendlymodelname = ""
77-
maxctx = 512
77+
maxctx = 2048
7878
maxlen = 128
7979
modelbusy = False
8080
port = 5001
@@ -265,7 +265,7 @@ def stop(self):
265265
mdl_nparts += 1
266266
modelname = os.path.abspath(sys.argv[1])
267267
print("Loading model: " + modelname)
268-
loadok = load_model(modelname,24,maxctx,4,mdl_nparts)
268+
loadok = load_model(modelname,24,maxctx,mdl_nparts)
269269
print("Load Model OK: " + str(loadok))
270270

271271
#friendlymodelname = Path(modelname).stem ### this wont work on local kobold api, so we must hardcode a known HF model name

0 commit comments

Comments
 (0)