Skip to content

Ernie-4.5-vl-a3b模型转vllm之后A800卡正常部署A100卡报错 #1441

@weishu20

Description

@weishu20

1.配置 --enforce-eager or not
2.4 切片、5 切片
3. vllm 0.11.2 和0.18.0
都报错

(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 190, in _init_executor
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] self.workers = WorkerProc.wait_for_ready(unready_workers)
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 731, in wait_for_ready
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] raise e from None
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
(EngineCore pid=121160) Process EngineCore:
(EngineCore pid=121160) Traceback (most recent call last):
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore pid=121160) self.run()
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/multiprocessing/process.py", line 108, in run
(EngineCore pid=121160) self._target(*self._args, **self._kwargs)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 1103, in run_engine_core
(EngineCore pid=121160) raise e
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 1073, in run_engine_core
(EngineCore pid=121160) engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
(EngineCore pid=121160) return func(*args, **kwargs)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 839, in init
(EngineCore pid=121160) super().init(
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 112, in init
(EngineCore pid=121160) self.model_executor = executor_class(vllm_config)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 101, in init
(EngineCore pid=121160) super().init(vllm_config)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
(EngineCore pid=121160) return func(*args, **kwargs)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/abstract.py", line 103, in init
(EngineCore pid=121160) self._init_executor()
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 190, in _init_executor
(EngineCore pid=121160) self.workers = WorkerProc.wait_for_ready(unready_workers)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 731, in wait_for_ready
(EngineCore pid=121160) raise e from None
(EngineCore pid=121160) Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
(APIServer pid=120465) Traceback (most recent call last):
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/runpy.py", line 196, in _run_module_as_main
(APIServer pid=120465) return _run_code(code, main_globals, None,
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/runpy.py", line 86, in _run_code
(APIServer pid=120465) exec(code, run_globals)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 696, in
(APIServer pid=120465) uvloop.run(run_server(args))
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/uvloop/init.py", line 82, in run
(APIServer pid=120465) return loop.run_until_complete(wrapper())
(APIServer pid=120465) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/uvloop/init.py", line 61, in wrapper
(APIServer pid=120465) return await main
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 656, in run_server
(APIServer pid=120465) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 670, in run_server_worker
(APIServer pid=120465) async with build_async_engine_client(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/contextlib.py", line 199, in aenter
(APIServer pid=120465) return await anext(self.gen)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 103, in build_async_engine_client
(APIServer pid=120465) async with build_async_engine_client_from_engine_args(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/contextlib.py", line 199, in aenter
(APIServer pid=120465) return await anext(self.gen)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 144, in build_async_engine_client_from_engine_args
(APIServer pid=120465) async_llm = AsyncLLM.from_vllm_config(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/async_llm.py", line 225, in from_vllm_config
(APIServer pid=120465) return cls(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/async_llm.py", line 154, in init
(APIServer pid=120465) self.engine_core = EngineCoreClient.make_async_mp_client(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
(APIServer pid=120465) return func(*args, **kwargs)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 128, in make_async_mp_client
(APIServer pid=120465) return AsyncMPClient(*client_args)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
(APIServer pid=120465) return func(*args, **kwargs)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 924, in init
(APIServer pid=120465) super().init(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 583, in init
(APIServer pid=120465) with launch_core_engines(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/contextlib.py", line 142, in exit
(APIServer pid=120465) next(self.gen)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/utils.py", line 972, in launch_core_engines
(APIServer pid=120465) wait_for_engine_startup(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/utils.py", line 1031, in wait_for_engine_startup
(APIServer pid=120465) raise RuntimeError(
(APIServer pid=120465) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions