In the environment shown below, the following error occurs when loading a model with vLLM. What would be the execution environment for eval_vllm.py?
from vllm import LLM, SamplingParams
llm = LLM(model="/home/filot/Data/models/fakeVLM",
dtype="float16",
max_model_len=4096,
seed=42
)
OS : Ubuntu 24.04
Python : 3.12
vLLM : 0.11.0
RuntimeError Traceback (most recent call last)
Cell In[3], line 1
----> 1 llm = LLM(model="/home/filot/Data/models/fakeVLM",
2 dtype="float16",
3 max_model_len=4096,
4 seed=42
5 )
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/llm.py:297, in LLM.init(self, model, runner, convert, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, allowed_local_media_path, allowed_media_domains, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, disable_custom_all_reduce, hf_token, hf_overrides, mm_processor_kwargs, pooler_config, override_pooler_config, structured_outputs_config, kv_cache_memory_bytes, compilation_config, logits_processors, **kwargs)
294 log_non_default_args(engine_args)
296 # Create the Engine (autoselects V0 vs V1)
--> 297 self.llm_engine = LLMEngine.from_engine_args(
298 engine_args=engine_args, usage_context=UsageContext.LLM_CLASS)
299 self.engine_class = type(self.llm_engine)
301 self.request_counter = Counter()
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/llm_engine.py:177, in LLMEngine.from_engine_args(cls, engine_args, usage_context, stat_loggers, enable_multiprocessing)
174 enable_multiprocessing = True
176 # Create the LLMEngine.
--> 177 return cls(vllm_config=vllm_config,
178 executor_class=executor_class,
179 log_stats=not engine_args.disable_log_stats,
180 usage_context=usage_context,
181 stat_loggers=stat_loggers,
182 multiprocess_mode=enable_multiprocessing)
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/llm_engine.py:114, in LLMEngine.init(self, vllm_config, executor_class, log_stats, usage_context, stat_loggers, mm_registry, use_cached_outputs, multiprocess_mode)
111 self.output_processor.tracer = tracer
113 # EngineCore (gets EngineCoreRequests and gives EngineCoreOutputs)
--> 114 self.engine_core = EngineCoreClient.make_client(
115 multiprocess_mode=multiprocess_mode,
116 asyncio_mode=False,
117 vllm_config=vllm_config,
118 executor_class=executor_class,
119 log_stats=self.log_stats,
120 )
122 self.logger_manager: Optional[StatLoggerManager] = None
123 if self.log_stats:
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py:80, in EngineCoreClient.make_client(multiprocess_mode, asyncio_mode, vllm_config, executor_class, log_stats)
76 return EngineCoreClient.make_async_mp_client(
77 vllm_config, executor_class, log_stats)
79 if multiprocess_mode and not asyncio_mode:
---> 80 return SyncMPClient(vllm_config, executor_class, log_stats)
82 return InprocClient(vllm_config, executor_class, log_stats)
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py:602, in SyncMPClient.init(self, vllm_config, executor_class, log_stats)
600 def init(self, vllm_config: VllmConfig, executor_class: type[Executor],
601 log_stats: bool):
--> 602 super().init(
603 asyncio_mode=False,
604 vllm_config=vllm_config,
605 executor_class=executor_class,
606 log_stats=log_stats,
607 )
609 self.is_dp = self.vllm_config.parallel_config.data_parallel_size > 1
610 self.outputs_queue = queue.QueueUnion[EngineCoreOutputs, Exception]
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py:448, in MPClient.init(self, asyncio_mode, vllm_config, executor_class, log_stats, client_addresses)
444 self.stats_update_address = client_addresses.get(
445 "stats_update_address")
446 else:
447 # Engines are managed by this client.
--> 448 with launch_core_engines(vllm_config, executor_class,
449 log_stats) as (engine_manager,
450 coordinator,
451 addresses):
452 self.resources.coordinator = coordinator
453 self.resources.engine_manager = engine_manager
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/contextlib.py:144, in _GeneratorContextManager.exit(self, typ, value, traceback)
142 if typ is None:
143 try:
--> 144 next(self.gen)
145 except StopIteration:
146 return False
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/utils.py:732, in launch_core_engines(vllm_config, executor_class, log_stats, num_api_servers)
729 yield local_engine_manager, coordinator, addresses
731 # Now wait for engines to start.
--> 732 wait_for_engine_startup(
733 handshake_socket,
734 addresses,
735 engines_to_handshake,
736 parallel_config,
737 vllm_config.cache_config,
738 local_engine_manager,
739 coordinator.proc if coordinator else None,
740 )
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/utils.py:785, in wait_for_engine_startup(handshake_socket, addresses, core_engines, parallel_config, cache_config, proc_manager, coord_process)
783 if coord_process is not None and coord_process.exitcode is not None:
784 finished[coord_process.name] = coord_process.exitcode
--> 785 raise RuntimeError("Engine core initialization failed. "
786 "See root cause above. "
787 f"Failed core proc(s): {finished}")
789 # Receive HELLO and READY messages from the input socket.
790 eng_identity, ready_msg_bytes = handshake_socket.recv_multipart()
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}
In the environment shown below, the following error occurs when loading a model with vLLM. What would be the execution environment for eval_vllm.py?
from vllm import LLM, SamplingParams
llm = LLM(model="/home/filot/Data/models/fakeVLM",
dtype="float16",
max_model_len=4096,
seed=42
)
OS : Ubuntu 24.04
Python : 3.12
vLLM : 0.11.0
RuntimeError Traceback (most recent call last)
Cell In[3], line 1
----> 1 llm = LLM(model="/home/filot/Data/models/fakeVLM",
2 dtype="float16",
3 max_model_len=4096,
4 seed=42
5 )
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/llm.py:297, in LLM.init(self, model, runner, convert, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, allowed_local_media_path, allowed_media_domains, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, disable_custom_all_reduce, hf_token, hf_overrides, mm_processor_kwargs, pooler_config, override_pooler_config, structured_outputs_config, kv_cache_memory_bytes, compilation_config, logits_processors, **kwargs)
294 log_non_default_args(engine_args)
296 # Create the Engine (autoselects V0 vs V1)
--> 297 self.llm_engine = LLMEngine.from_engine_args(
298 engine_args=engine_args, usage_context=UsageContext.LLM_CLASS)
299 self.engine_class = type(self.llm_engine)
301 self.request_counter = Counter()
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/llm_engine.py:177, in LLMEngine.from_engine_args(cls, engine_args, usage_context, stat_loggers, enable_multiprocessing)
174 enable_multiprocessing = True
176 # Create the LLMEngine.
--> 177 return cls(vllm_config=vllm_config,
178 executor_class=executor_class,
179 log_stats=not engine_args.disable_log_stats,
180 usage_context=usage_context,
181 stat_loggers=stat_loggers,
182 multiprocess_mode=enable_multiprocessing)
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/llm_engine.py:114, in LLMEngine.init(self, vllm_config, executor_class, log_stats, usage_context, stat_loggers, mm_registry, use_cached_outputs, multiprocess_mode)
111 self.output_processor.tracer = tracer
113 # EngineCore (gets EngineCoreRequests and gives EngineCoreOutputs)
--> 114 self.engine_core = EngineCoreClient.make_client(
115 multiprocess_mode=multiprocess_mode,
116 asyncio_mode=False,
117 vllm_config=vllm_config,
118 executor_class=executor_class,
119 log_stats=self.log_stats,
120 )
122 self.logger_manager: Optional[StatLoggerManager] = None
123 if self.log_stats:
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py:80, in EngineCoreClient.make_client(multiprocess_mode, asyncio_mode, vllm_config, executor_class, log_stats)
76 return EngineCoreClient.make_async_mp_client(
77 vllm_config, executor_class, log_stats)
79 if multiprocess_mode and not asyncio_mode:
---> 80 return SyncMPClient(vllm_config, executor_class, log_stats)
82 return InprocClient(vllm_config, executor_class, log_stats)
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py:602, in SyncMPClient.init(self, vllm_config, executor_class, log_stats)
600 def init(self, vllm_config: VllmConfig, executor_class: type[Executor],
601 log_stats: bool):
--> 602 super().init(
603 asyncio_mode=False,
604 vllm_config=vllm_config,
605 executor_class=executor_class,
606 log_stats=log_stats,
607 )
609 self.is_dp = self.vllm_config.parallel_config.data_parallel_size > 1
610 self.outputs_queue = queue.QueueUnion[EngineCoreOutputs, Exception]
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py:448, in MPClient.init(self, asyncio_mode, vllm_config, executor_class, log_stats, client_addresses)
444 self.stats_update_address = client_addresses.get(
445 "stats_update_address")
446 else:
447 # Engines are managed by this client.
--> 448 with launch_core_engines(vllm_config, executor_class,
449 log_stats) as (engine_manager,
450 coordinator,
451 addresses):
452 self.resources.coordinator = coordinator
453 self.resources.engine_manager = engine_manager
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/contextlib.py:144, in _GeneratorContextManager.exit(self, typ, value, traceback)
142 if typ is None:
143 try:
--> 144 next(self.gen)
145 except StopIteration:
146 return False
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/utils.py:732, in launch_core_engines(vllm_config, executor_class, log_stats, num_api_servers)
729 yield local_engine_manager, coordinator, addresses
731 # Now wait for engines to start.
--> 732 wait_for_engine_startup(
733 handshake_socket,
734 addresses,
735 engines_to_handshake,
736 parallel_config,
737 vllm_config.cache_config,
738 local_engine_manager,
739 coordinator.proc if coordinator else None,
740 )
File ~/Apps/miniforge3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/utils.py:785, in wait_for_engine_startup(handshake_socket, addresses, core_engines, parallel_config, cache_config, proc_manager, coord_process)
783 if coord_process is not None and coord_process.exitcode is not None:
784 finished[coord_process.name] = coord_process.exitcode
--> 785 raise RuntimeError("Engine core initialization failed. "
786 "See root cause above. "
787 f"Failed core proc(s): {finished}")
789 # Receive HELLO and READY messages from the input socket.
790 eng_identity, ready_msg_bytes = handshake_socket.recv_multipart()
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}