sgl-project · jinmingyi1998 · Jun 9, 2025 · Jun 9, 2025 · Fridge003 · Jun 9, 2025
@@ -406,8 +406,9 @@ def model_specific_adjustment(self):
                     f"Automatically turn of --chunked-prefill-size as it is not supported for "
                     f"{self.model_config.hf_config.model_type}"
                 )
-
-        if not self.use_mla_backend:
+        if server_args.disable_radix_cache:
+            server_args.disable_chunked_prefix_cache = True
+        elif not self.use_mla_backend:
             server_args.disable_chunked_prefix_cache = True
         elif self.page_size > 1:
             logger.info("Disable chunked prefix cache when page size > 1.")

@@ -1411,7 +1411,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
         parser.add_argument(
             "--disable-chunked-prefix-cache",
             action="store_true",
-            help="Disable chunked prefix cache feature for deepseek, which should save overhead for short sequences.",
+            help="For Deepseek, Disable chunked-prefix-cache to save overhead for short sequences.",
         )
         parser.add_argument(
             "--disable-fast-image-processor",