replace extend batch with idle batch

ch-wan · ch-wan · commit 95de7df75c80 · 2025-04-15T17:41:18.000Z
diff --git a/python/sglang/srt/managers/data_parallel_controller.py b/python/sglang/srt/managers/data_parallel_controller.py
@@ -23,8 +23,8 @@
 import setproctitle
 import zmq
 
-from python.sglang.srt.disaggregation.utils import DisaggregationMode
-from python.sglang.srt.managers.schedule_batch import Req
+from sglang.srt.disaggregation.utils import DisaggregationMode
+from sglang.srt.managers.schedule_batch import Req
 from sglang.srt.layers.dp_attention import compute_dp_attention_world_info
 from sglang.srt.managers.io_struct import (
     TokenizedEmbeddingReqInput,
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
@@ -710,24 +710,28 @@ def event_loop_normal_disagg_decode(self):
             self.process_decode_queue()
             batch = self.get_next_disagg_decode_batch_to_run()
             
+            extend_batch = None
+            if batch and batch.forward_mode.is_extend():
+                extend_batch = batch
+                batch = None
+            
             # Handle DP attention
             if self.server_args.enable_dp_attention or self.server_args.enable_sp_layernorm:
                 batch, _ = self.prepare_dp_attn_batch(batch)
-                
-            self.cur_batch = batch
+            
+            self.cur_batch = extend_batch if extend_batch else batch
+
+            # Generate fake extend output.
+            if extend_batch:
+                # Note: Logprobs should be handled on the prefill engine.
+                # FIXME: stream_output
+                self.stream_output(
+                    extend_batch.reqs, False
+                )
 
             if batch:
-                # Generate fake extend output.
-                if batch.forward_mode.is_extend():
-                    # Note: Logprobs should be handled on the prefill engine.
-                    self.stream_output(
-                        batch.reqs, [False for _ in range(len(batch.reqs))]
-                    )
-                    result = self.run_batch(batch)
-                    self.process_batch_result(batch, result)
-                else:
-                    result = self.run_batch(batch)
-                    self.process_batch_result(batch, result)
+                result = self.run_batch(batch)
+                self.process_batch_result(batch, result)
 
             if batch is None and (
                 len(self.disagg_decode_transfer_queue.queue)
@@ -738,7 +742,7 @@ def event_loop_normal_disagg_decode(self):
                 self.check_memory()
                 self.new_token_ratio = self.init_new_token_ratio
 
-            self.last_batch = batch
+            self.last_batch = extend_batch if extend_batch else batch
 
     def recv_requests(self) -> List[Req]:
         """Receive results at tp_rank = 0 and broadcast it to all other TP ranks."""