KV‑Cache (MHA, MLA): add missing start_layer / end_layer fields to MHATokenToKVPoolHost and MLATokenToKVPoolHost (#6016)

Simon-Li · 继优 · chus-chus · web-flow · commit b29a026e14b9 · 2025-05-09T15:50:06.000-07:00
Co-authored-by: 继优 &lt;jiyou.ljy@alibaba-inc.com&gt;
Co-authored-by: chus-chus &lt;chus-chus@users.noreply.github.com&gt;
Co-authored-by: Zhiqiang Xie &lt;xiezhq@stanford.edu&gt;
diff --git a/python/sglang/srt/mem_cache/memory_pool.py b/python/sglang/srt/mem_cache/memory_pool.py
@@ -762,6 +762,8 @@ def __init__(
             self.size = int(device_pool.size * host_to_device_ratio)
         # Align the host memory pool size to the page size
         self.size = self.size - (self.size % self.page_size)
+        self.start_layer = device_pool.start_layer
+        self.end_layer = device_pool.end_layer
 
         assert (
             self.size > device_pool.size