We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b200261 commit 95ce7f4Copy full SHA for 95ce7f4
python/sglang/srt/layers/attention/flashattention_backend.py
@@ -523,7 +523,7 @@ def forward_extend(
523
# here is two side inclusive
524
window_size = (
525
(layer.sliding_window_size, 0)
526
- if layer.sliding_window_size is not None
+ if layer.sliding_window_size is not None and layer.sliding_window_size > -1
527
else (-1, -1)
528
)
529
k_descale, v_descale = None, None
@@ -664,7 +664,7 @@ def forward_decode(
664
665
666
667
668
669
670
causal = not layer.is_cross_attention
0 commit comments