File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed
docs/source/getting_started/installation/gpu Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -23,6 +23,8 @@ Currently, there are no pre-built XPU wheels.
23
23
- Second, install Python packages for vLLM XPU backend building:
24
24
25
25
``` console
26
+ git clone https://github.com/vllm-project/vllm.git
27
+ cd vllm
26
28
pip install --upgrade pip
27
29
pip install -v -r requirements/xpu.txt
28
30
```
Original file line number Diff line number Diff line change @@ -220,8 +220,8 @@ def forward(
220
220
value_cache ,
221
221
attn_metadata .slot_mapping .flatten (),
222
222
self .kv_cache_dtype ,
223
- layer ._k_scale ,
224
- layer ._v_scale ,
223
+ layer ._k_scale_float ,
224
+ layer ._v_scale_float ,
225
225
)
226
226
227
227
if attn_metadata .is_prompt :
@@ -306,8 +306,8 @@ def forward(
306
306
max_seq_len ,
307
307
self .alibi_slopes ,
308
308
self .kv_cache_dtype ,
309
- layer ._k_scale ,
310
- layer ._v_scale ,
309
+ layer ._k_scale_float ,
310
+ layer ._v_scale_float ,
311
311
)
312
312
else :
313
313
# Run PagedAttention V2.
@@ -339,8 +339,8 @@ def forward(
339
339
max_seq_len ,
340
340
self .alibi_slopes ,
341
341
self .kv_cache_dtype ,
342
- layer ._k_scale ,
343
- layer ._v_scale ,
342
+ layer ._k_scale_float ,
343
+ layer ._v_scale_float ,
344
344
)
345
345
346
346
# Reshape the output tensor.
You can’t perform that action at this time.
0 commit comments