-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Ernie-4.5-vl-a3b模型转vllm之后A800卡正常部署A100卡报错 #1441
Description
1.配置 --enforce-eager or not
2.4 切片、5 切片
3. vllm 0.11.2 和0.18.0
都报错
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 190, in _init_executor
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] self.workers = WorkerProc.wait_for_ready(unready_workers)
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 731, in wait_for_ready
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] raise e from None
(EngineCore pid=121160) ERROR 03-30 20:05:25 [core.py:1099] Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
(EngineCore pid=121160) Process EngineCore:
(EngineCore pid=121160) Traceback (most recent call last):
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore pid=121160) self.run()
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/multiprocessing/process.py", line 108, in run
(EngineCore pid=121160) self._target(*self._args, **self._kwargs)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 1103, in run_engine_core
(EngineCore pid=121160) raise e
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 1073, in run_engine_core
(EngineCore pid=121160) engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
(EngineCore pid=121160) return func(*args, **kwargs)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 839, in init
(EngineCore pid=121160) super().init(
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 112, in init
(EngineCore pid=121160) self.model_executor = executor_class(vllm_config)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 101, in init
(EngineCore pid=121160) super().init(vllm_config)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
(EngineCore pid=121160) return func(*args, **kwargs)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/abstract.py", line 103, in init
(EngineCore pid=121160) self._init_executor()
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 190, in _init_executor
(EngineCore pid=121160) self.workers = WorkerProc.wait_for_ready(unready_workers)
(EngineCore pid=121160) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/executor/multiproc_executor.py", line 731, in wait_for_ready
(EngineCore pid=121160) raise e from None
(EngineCore pid=121160) Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
(APIServer pid=120465) Traceback (most recent call last):
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/runpy.py", line 196, in _run_module_as_main
(APIServer pid=120465) return _run_code(code, main_globals, None,
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/runpy.py", line 86, in _run_code
(APIServer pid=120465) exec(code, run_globals)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 696, in
(APIServer pid=120465) uvloop.run(run_server(args))
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/uvloop/init.py", line 82, in run
(APIServer pid=120465) return loop.run_until_complete(wrapper())
(APIServer pid=120465) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/uvloop/init.py", line 61, in wrapper
(APIServer pid=120465) return await main
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 656, in run_server
(APIServer pid=120465) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 670, in run_server_worker
(APIServer pid=120465) async with build_async_engine_client(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/contextlib.py", line 199, in aenter
(APIServer pid=120465) return await anext(self.gen)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 103, in build_async_engine_client
(APIServer pid=120465) async with build_async_engine_client_from_engine_args(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/contextlib.py", line 199, in aenter
(APIServer pid=120465) return await anext(self.gen)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 144, in build_async_engine_client_from_engine_args
(APIServer pid=120465) async_llm = AsyncLLM.from_vllm_config(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/async_llm.py", line 225, in from_vllm_config
(APIServer pid=120465) return cls(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/async_llm.py", line 154, in init
(APIServer pid=120465) self.engine_core = EngineCoreClient.make_async_mp_client(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
(APIServer pid=120465) return func(*args, **kwargs)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 128, in make_async_mp_client
(APIServer pid=120465) return AsyncMPClient(*client_args)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/tracing/otel.py", line 178, in sync_wrapper
(APIServer pid=120465) return func(*args, **kwargs)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 924, in init
(APIServer pid=120465) super().init(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 583, in init
(APIServer pid=120465) with launch_core_engines(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/contextlib.py", line 142, in exit
(APIServer pid=120465) next(self.gen)
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/utils.py", line 972, in launch_core_engines
(APIServer pid=120465) wait_for_engine_startup(
(APIServer pid=120465) File "/root/paddlejob/workspace/env_run/code/a3b_mtp_deploy/vllm/lib/python3.10/site-packages/vllm/v1/engine/utils.py", line 1031, in wait_for_engine_startup
(APIServer pid=120465) raise RuntimeError(
(APIServer pid=120465) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}