Skip to content

Commit 54b9e5b

Browse files
author
颉沆
committed
Merge branch 'main' into dev/fix-dp-ffn-cuda-graph
2 parents bf10e71 + 7d3a3d4 commit 54b9e5b

File tree

7 files changed

+38
-16
lines changed

7 files changed

+38
-16
lines changed

.github/workflows/nightly-test-amd.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ jobs:
3131
DEVICE_FLAG="--device /dev/dri"
3232
fi
3333
touch github_summary.md
34-
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
34+
docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
3535
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
3636
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
3737
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
3838
-w /sglang-checkout --name ci_sglang \
39-
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
39+
lmsysorg/sglang:v0.4.6.post3-rocm630
4040
4141
- name: Install dependencies
4242
run: |

.github/workflows/pr-test-amd.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,12 @@ jobs:
3838
else
3939
DEVICE_FLAG="--device /dev/dri"
4040
fi
41-
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
41+
docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
4242
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
4343
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
4444
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
4545
-w /sglang-checkout --name ci_sglang \
46-
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
46+
lmsysorg/sglang:v0.4.6.post3-rocm630
4747
4848
- name: Install dependencies
4949
run: |
@@ -78,12 +78,12 @@ jobs:
7878
else
7979
DEVICE_FLAG="--device /dev/dri"
8080
fi
81-
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
81+
docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
8282
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
8383
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
8484
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
8585
-w /sglang-checkout --name ci_sglang \
86-
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
86+
lmsysorg/sglang:v0.4.6.post3-rocm630
8787
8888
- name: Install dependencies
8989
run: |
@@ -116,12 +116,12 @@ jobs:
116116
else
117117
DEVICE_FLAG="--device /dev/dri"
118118
fi
119-
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
119+
docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
120120
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
121121
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
122122
--cap-add=SYS_PTRACE -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} --security-opt seccomp=unconfined \
123123
-w /sglang-checkout --name ci_sglang \
124-
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
124+
lmsysorg/sglang:v0.4.6.post3-rocm630
125125
126126
- name: Install dependencies
127127
run: |
@@ -154,12 +154,12 @@ jobs:
154154
else
155155
DEVICE_FLAG="--device /dev/dri"
156156
fi
157-
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
157+
docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
158158
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
159159
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
160160
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
161161
-w /sglang-checkout --name ci_sglang \
162-
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
162+
lmsysorg/sglang:v0.4.6.post3-rocm630
163163
164164
- name: Install dependencies
165165
run: |
@@ -213,12 +213,12 @@ jobs:
213213
else
214214
DEVICE_FLAG="--device /dev/dri"
215215
fi
216-
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
216+
docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
217217
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
218218
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
219219
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
220220
-w /sglang-checkout --name ci_sglang \
221-
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
221+
lmsysorg/sglang:v0.4.6.post3-rocm630
222222
223223
- name: Install dependencies
224224
run: |
@@ -261,12 +261,12 @@ jobs:
261261
else
262262
DEVICE_FLAG="--device /dev/dri"
263263
fi
264-
docker pull ghcr.io/saienduri/sglang-aiter-v0.1.1:428
264+
docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
265265
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
266266
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
267267
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
268268
-w /sglang-checkout --name ci_sglang \
269-
ghcr.io/saienduri/sglang-aiter-v0.1.1:428
269+
lmsysorg/sglang:v0.4.6.post3-rocm630
270270
271271
- name: Install dependencies
272272
run: |

python/sglang/srt/entrypoints/http_server.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,11 @@ async def start_profile_async(obj: Optional[ProfileReqInput] = None):
338338
obj = ProfileReqInput()
339339

340340
await _global_state.tokenizer_manager.start_profile(
341-
obj.output_dir, obj.num_steps, obj.activities
341+
output_dir=obj.output_dir,
342+
num_steps=obj.num_steps,
343+
activities=obj.activities,
344+
with_stack=obj.with_stack,
345+
record_shapes=obj.record_shapes,
342346
)
343347
return Response(
344348
content="Start profiling.\n",

python/sglang/srt/layers/logits_processor.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@
4747
logger = logging.getLogger(__name__)
4848

4949

50+
from sglang.srt.layers.vocab_parallel_embedding import VocabParallelEmbedding
51+
from sglang.srt.managers.schedule_batch import global_server_args_dict
52+
from sglang.srt.model_executor.forward_batch_info import (
53+
CaptureHiddenMode,
54+
ForwardBatch,
55+
ForwardMode,
56+
)
57+
from sglang.srt.utils import dump_to_file
58+
59+
logger = logging.getLogger(__name__)
60+
61+
5062
@dataclasses.dataclass
5163
class LogitsProcessorOutput:
5264
## Part 1: This part will be assigned in python/sglang/srt/layers/logits_processor.py::LogitsProcessor

python/sglang/srt/managers/io_struct.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,8 @@ class ProfileReqInput:
836836
# the caller doesn't need to run stop_profile.
837837
num_steps: Optional[int] = None
838838
activities: Optional[List[Literal["CPU", "GPU", "MEM", "CUDA_PROFILER"]]] = None
839+
with_stack: Optional[bool] = None
840+
record_shapes: Optional[bool] = None
839841

840842

841843
class ProfileReqType(Enum):

python/sglang/srt/managers/tokenizer_manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,12 +747,16 @@ async def start_profile(
747747
output_dir: Optional[str] = None,
748748
num_steps: Optional[int] = None,
749749
activities: Optional[List[str]] = None,
750+
with_stack: Optional[bool] = None,
751+
record_shapes: Optional[bool] = None,
750752
):
751753
req = ProfileReq(
752754
type=ProfileReqType.START_PROFILE,
753755
output_dir=output_dir,
754756
num_steps=num_steps,
755757
activities=activities,
758+
with_stack=with_stack,
759+
record_shapes=record_shapes,
756760
profile_id=str(time.time()),
757761
)
758762
result = (await self.start_profile_communicator(req))[0]

python/sglang/srt/server_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1205,7 +1205,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
12051205
type=int,
12061206
default=0,
12071207
help="The number of shared_experts need to be replicated to fuse with normal experts in deepseek v3/r1, "
1208-
"set it to tp_size can get best optimized performance.",
1208+
"set it to tp_size can get best optimized performance. Note that for architectures with SM==90, we have enabled the shared experts fusion optimization by default for DeepSeek V3/R1, with n_share_experts_fusion automatically set to the TP size.",
12091209
)
12101210
parser.add_argument(
12111211
"--disable-chunked-prefix-cache",

0 commit comments

Comments
 (0)