Skip to content

Commit 3c076c3

Browse files
authored
Disable half2 for ExLlama when using HIP (#2912)
1 parent ac0f96e commit 3c076c3

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

modules/exllama.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import sys
22
from pathlib import Path
33

4+
from torch import version as torch_version
5+
46
from modules import shared
57
from modules.logging_colors import logger
68

@@ -51,6 +53,12 @@ def from_pretrained(self, path_to_model):
5153
if shared.args.gpu_split:
5254
config.set_auto_map(shared.args.gpu_split)
5355
config.gpu_peer_fix = True
56+
if torch_version.hip:
57+
config.rmsnorm_no_half2 = True
58+
config.rope_no_half2 = True
59+
config.matmul_no_half2 = True
60+
config.silu_no_half2 = True
61+
5462

5563
model = ExLlama(config)
5664
tokenizer = ExLlamaTokenizer(str(tokenizer_model_path))

modules/exllama_hf.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
9797
if shared.args.gpu_split:
9898
config.set_auto_map(shared.args.gpu_split)
9999
config.gpu_peer_fix = True
100+
if torch.version.hip:
101+
config.rmsnorm_no_half2 = True
102+
config.rope_no_half2 = True
103+
config.matmul_no_half2 = True
104+
config.silu_no_half2 = True
100105

101106
# This slowes down a bit but align better with autogptq generation.
102107
# TODO: Should give user choice to tune the exllama config

0 commit comments

Comments
 (0)