linkedin · hebiao064 · Jan 15, 2025 · Jan 15, 2025 · Jan 15, 2025 · Jan 15, 2025
diff --git a/benchmark/scripts/benchmark_rope.py b/benchmark/scripts/benchmark_rope.py
@@ -1,6 +1,8 @@
 import torch
 import triton
 
+from transformers import __version__ as transformers_version
+from transformers.models.llama.configuration_llama import LlamaConfig
 from transformers.models.llama.modeling_llama import LlamaRotaryEmbedding
 from transformers.models.llama.modeling_llama import apply_rotary_pos_emb
 from utils import QUANTILES
@@ -30,7 +32,13 @@ def bench_speed_rope(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput
     seq_len = extra_benchmark_config["seq_len"] if "seq_len" in extra_benchmark_config else input.x
 
     head_dim = hidden_size // num_q_heads
-    rotary_emb = LlamaRotaryEmbedding(head_dim, device=device)
+
+    if transformers_version < "4.48.0":
+        # LlamaRotaryEmbedding constructor signature changed in transformers 4.48.0
+        rotary_emb = LlamaRotaryEmbedding(head_dim, device=device)
+    else:
+        llama_config = LlamaConfig(head_dim=head_dim)
+        rotary_emb = LlamaRotaryEmbedding(llama_config, device=device)
     q = torch.randn(
         (1, seq_len, num_q_heads, head_dim),
         device=device,
@@ -105,7 +113,13 @@ def bench_memory_rope(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutpu
     seq_len = extra_benchmark_config["seq_len"] if "seq_len" in extra_benchmark_config else input.x
 
     head_dim = hidden_size // num_q_heads
-    rotary_emb = LlamaRotaryEmbedding(head_dim, device=device)
+
+    if transformers_version < "4.48.0":
+        # LlamaRotaryEmbedding constructor signature changed in transformers 4.48.0
+        rotary_emb = LlamaRotaryEmbedding(head_dim, device=device)
+    else:
+        llama_config = LlamaConfig(head_dim=head_dim)
+        rotary_emb = LlamaRotaryEmbedding(llama_config, device=device)
     q = torch.randn(
         (1, seq_len, num_q_heads, head_dim),
         device=device,

diff --git a/test/transformers/test_rope.py b/test/transformers/test_rope.py
@@ -2,6 +2,8 @@
 import torch
 
 from test.utils import supports_bfloat16
+from transformers import __version__ as transformers_version
+from transformers.models.llama.configuration_llama import LlamaConfig
 from transformers.models.llama.modeling_llama import LlamaRotaryEmbedding
 from transformers.models.llama.modeling_llama import apply_rotary_pos_emb
 
@@ -57,7 +59,12 @@ def test_correctness(
     atol,
     rtol,
 ):
-    rotary_emb = LlamaRotaryEmbedding(head_dim, device=device)
+    if transformers_version < "4.48.0":
+        # LlamaRotaryEmbedding constructor signature changed in transformers 4.48.0
+        rotary_emb = LlamaRotaryEmbedding(head_dim, device=device)
+    else:
+        llama_config = LlamaConfig(head_dim=head_dim)
+        rotary_emb = LlamaRotaryEmbedding(llama_config, device=device)
 
     _tensor_q = torch.randn((bsz, seq_len, num_q_heads, head_dim), device=device).transpose(1, 2).to(dtype)
 
@@ -133,7 +140,12 @@ def test_functional_correctness(
     k1 = _k.clone().requires_grad_(True)
     k2 = _k.clone().requires_grad_(True)
 
-    rotary_emb = LlamaRotaryEmbedding(head_dim, device=device)
+    if transformers_version < "4.48.0":
+        # LlamaRotaryEmbedding constructor signature changed in transformers 4.48.0
+        rotary_emb = LlamaRotaryEmbedding(head_dim, device=device)
+    else:
+        llama_config = LlamaConfig(head_dim=head_dim)
+        rotary_emb = LlamaRotaryEmbedding(llama_config, device=device)
 
     pos_ids = torch.arange(seq_len, device=device, dtype=torch.long).unsqueeze(0)
     if expand_position_ids: