Skip to content

Commit ad7d3b6

Browse files
authored
docker vllm: add new configs (#1506)
docker vllm: add new configs --------- Signed-off-by: Tomasz Thaddey <[email protected]>
1 parent f865175 commit ad7d3b6

File tree

11 files changed

+334
-107
lines changed

11 files changed

+334
-107
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
deepSeek-R1-Distill-Llama-70B:
2+
MODEL: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
3+
4+
llama31_8b_instruct:
5+
MODEL: meta-llama/Llama-3.1-8B-Instruct
6+
7+
llama31_70b_instruct:
8+
MODEL: meta-llama/Llama-3.1-70B-Instruct
9+
10+
llama31_405b_instruct:
11+
MODEL: meta-llama/Llama-3.1-405B-Instruct
12+
13+
llama32_1b_instruct:
14+
MODEL: meta-llama/Llama-3.2-1B-Instruct
15+
16+
llama32_3b_instruct:
17+
MODEL: meta-llama/Llama-3.2-3B-Instruct
18+
19+
llama33_70b_instruct:
20+
MODEL: meta-llama/Llama-3.3-70B-Instruct
21+
22+
mistral_7b_instruct_v02:
23+
MODEL: mistralai/Mistral-7B-Instruct-v0.2
24+
25+
mixtral_8x22b_instruct_v01:
26+
MODEL: mistralai/Mixtral-8x22B-Instruct-v0.1
27+
28+
mixtral_8x7b_instruct_v01:
29+
MODEL: mistralai/Mixtral-8x7B-Instruct-v0.1
30+
31+
qwen25_14b_instruct:
32+
MODEL: Qwen/Qwen2.5-14B-Instruct
33+
34+
qwen25_32b_instruct:
35+
MODEL: Qwen/Qwen2.5-32B-Instruct
36+
37+
qwen25_72b_instruct:
38+
MODEL: Qwen/Qwen2.5-72B-Instruct
39+
40+
qwen25_7b_instruct:
41+
MODEL: Qwen/Qwen2.5-7B-Instruct
42+
43+
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
llama32-11B-Vision-Instruct:
2+
MODEL: meta-llama/Llama-3.2-11B-Vision-Instruct
3+
4+
llama32-90B-Vision-Instruct:
5+
MODEL: meta-llama/Llama-3.2-90B-Vision-Instruct
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
defaults_text:
2+
MODELS:
3+
- deepseek-ai/DeepSeek-R1-Distill-Llama-70B
4+
- meta-llama/Llama-3.1-70B-Instruct
5+
- meta-llama/Llama-3.1-405B-Instruct
6+
- meta-llama/Llama-3.1-8B-Instruct
7+
- meta-llama/Llama-3.2-1B-Instruct
8+
- meta-llama/Llama-3.2-3B-Instruct
9+
- meta-llama/Llama-3.3-70B-Instruct
10+
- mistralai/Mistral-7B-Instruct-v0.2
11+
- mistralai/Mixtral-8x22B-Instruct-v0.1
12+
- mistralai/Mixtral-8x7B-Instruct-v0.1
13+
- Qwen/Qwen2.5-14B-Instruct
14+
- Qwen/Qwen2.5-32B-Instruct
15+
- Qwen/Qwen2.5-72B-Instruct
16+
- Qwen/Qwen2.5-7B-Instruct
17+
DATASET: /workspace/vllm/benchmarks/sonnet.txt
18+
DATASET_NAME: sonnet
19+
BACKEND: vllm
20+
INPUT_TOK: 2048
21+
OUTPUT_TOK: 2048
22+
CONCURRENT_REQ: 64
23+
NUM_PROMPTS: 640
24+
MAX_MODEL_LEN: 4352
25+
PREFIX_LEN: 100
26+
27+
defaults_vision:
28+
MODELS:
29+
- meta-llama/Llama-3.2-11B-Vision-Instruct
30+
- meta-llama/Llama-3.2-90B-Vision-Instruct
31+
DATASET: lmarena-ai/vision-arena-bench-v0.1
32+
DATASET_NAME: hf
33+
BACKEND: openai-chat
34+
CONCURRENT_REQ: 64
35+
NUM_PROMPTS: 500

.cd/benchmark_configurations/llama8b.yaml

Lines changed: 0 additions & 27 deletions
This file was deleted.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
variables:
2+
- MODEL
3+
- INPUT_TOK
4+
- OUTPUT_TOK
5+
- CON_REQ
6+
- NUM_PROMPTS

.cd/docker-compose.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,20 @@ services:
22
vllm-server:
33
image: ${DOCKER_IMAGE}
44
environment:
5+
- MODEL
56
- HF_HOME=/mnt/hf_cache
6-
- HF_TOKEN=${HF_TOKEN}
7+
- HF_TOKEN
78
- HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES:-all}
89
- PYTHONUNBUFFERED=1
10+
- PT_HPU_LAZY_MODE
11+
- VLLM_DECODE_BLOCK_BUCKET_STEP
12+
- VLLM_DECODE_BS_BUCKET_STEP
13+
- VLLM_PROMPT_BS_BUCKET_STEP
14+
- VLLM_PROMPT_SEQ_BUCKET_STEP
15+
- VLLM_SKIP_WARMUP
16+
- MAX_MODEL_LEN
17+
- MAX_NUM_SEQS
18+
- TENSOR_PARALLEL_SIZE
919
volumes:
1020
- /mnt/hf_cache:/mnt/hf_cache
1121
ports:
@@ -31,6 +41,11 @@ services:
3141
vllm-server:
3242
condition: service_healthy
3343
environment:
44+
- MODEL
3445
- HF_TOKEN=${HF_TOKEN}
3546
- PYTHONUNBUFFERED=1
47+
- INPUT_TOK
48+
- OUTPUT_TOK
49+
- CON_REQ
50+
- NUM_PROMPTS
3651
command: ["benchmark", "--config-file", "${VLLM_BENCHMARK_CONFIG_FILE}", "--config-name", "${VLLM_BENCHMARK_CONFIG_NAME}"]

.cd/entrypoints/entrypoint_main.py

Lines changed: 142 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,60 +29,166 @@ def __init__(self, mode="server", config_file=None, config_name=None):
2929
file=sys.stderr)
3030
sys.exit(1)
3131

32-
def _load_env_from_config(self):
32+
def _load_env_from_defaults(self):
33+
"""
34+
Loads default environment variables from a YAML file based on the mode.
35+
For each section starting with 'defaults_', if model is in the section's
36+
'MODELS' list, loads the environment variables from that section.
37+
If no section matches, loads nothing.
38+
If the file does not exist, it returns an empty dictionary.
39+
"""
40+
defaults_file = ("server_configurations/defaults.yaml"
41+
if self.mode == "server" else
42+
"benchmark_configurations/defaults.yaml")
43+
try:
44+
with open(defaults_file) as f:
45+
config = yaml.safe_load(f)
46+
found = False
47+
for section_name, section in config.items():
48+
if section_name.startswith("defaults_") and isinstance(
49+
section, dict):
50+
models = section.get("MODELS", [])
51+
if (isinstance(models, list)
52+
and self.config_envs.get("MODEL") in models):
53+
env_vars = {
54+
k: v
55+
for k, v in section.items() if k != "MODELS"
56+
}
57+
self.config_envs.update(env_vars)
58+
print(
59+
f"[INFO] Loaded default configuration section "
60+
f"'{section_name}' for model "
61+
f"'{self.config_envs.get('MODEL')}' from file: "
62+
f"{defaults_file}")
63+
for key, value in env_vars.items():
64+
print(f" {key}: {value}")
65+
found = True
66+
if not found:
67+
print(f"[WARNING] No defaults section found for model "
68+
f"'{self.config_envs.get('MODEL')}' in "
69+
f"'{defaults_file}'.")
70+
except FileNotFoundError:
71+
print(f"[WARNING] Defaults file '{defaults_file}' not found. "
72+
"No defaults loaded.")
73+
except Exception as e:
74+
print(
75+
f"[ERROR] Failed to load defaults: {e}",
76+
file=sys.stderr,
77+
)
78+
sys.exit(1)
79+
80+
def _load_env_from_config_file(self):
81+
"""
82+
Loads a specific configuration section from a YAML file and updates the
83+
current environment configuration with the values from that section.
84+
If a key already exists (e.g., from defaults), it will be overwritten
85+
by the value from the file. Exits the program with an error message if
86+
the section is missing or invalid, or if the file cannot be read.
87+
88+
Raises:
89+
SystemExit: If the configuration file or section is missing,
90+
invalid, or cannot be loaded.
91+
"""
3392
try:
3493
with open(self.config_file) as f:
3594
config = yaml.safe_load(f)
3695
section = config.get(self.config_name)
37-
if section is None:
38-
print(
39-
f"[ERROR] Section '{self.config_name}' not found in "
40-
f"'{self.config_file}'.",
41-
file=sys.stderr)
42-
sys.exit(1)
43-
if not isinstance(section, dict):
96+
if section is None or not isinstance(section, dict):
4497
print(
45-
f"[ERROR] Section '{self.config_name}' is not a "
46-
f"dictionary in '{self.config_file}'.",
47-
file=sys.stderr)
98+
f"[ERROR] Section '{self.config_name}' not found or "
99+
f"is not a dictionary in '{self.config_file}'.",
100+
file=sys.stderr,
101+
)
48102
sys.exit(1)
49-
self.config_envs = section
50-
print(f"[INFO] Loaded configuration from file: "
51-
f"{self.config_file}, section: {self.config_name}")
52-
print("[INFO] The following parameters and values were loaded "
53-
"from the config file:")
54-
for key, value in self.config_envs.items():
103+
print(f"[INFO] Loaded configuration section "
104+
f"'{self.config_name}' from file: {self.config_file}")
105+
for key, value in section.items():
55106
print(f" {key}: {value}")
107+
return section
56108
except Exception as e:
57-
print(f"[ERROR] Failed to load config: {e}", file=sys.stderr)
109+
print(
110+
f"[ERROR] Failed to load config: {e}",
111+
file=sys.stderr,
112+
)
113+
sys.exit(1)
114+
115+
def _update_benchmark_envs_from_user_vars(self):
116+
"""
117+
Loads a list of variable names from a YAML file and, for each variable
118+
present in the current environment, updates the internal configuration
119+
dictionary with the environment value. If the YAML file is missing or
120+
empty, no variables are updated.
121+
"""
122+
user_vars_file = "benchmark_configurations/user_vars.yaml"
123+
try:
124+
with open(user_vars_file) as f:
125+
user_vars = yaml.safe_load(f)
126+
if user_vars and isinstance(user_vars, dict):
127+
variables = user_vars.get("variables", [])
128+
for var in variables:
129+
if var in os.environ:
130+
self.config_envs[var] = os.environ[var]
131+
print(f"[INFO] Overwriting {var} with value from "
132+
f"environment: {self.config_envs[var]}")
133+
else:
134+
print(f"[WARNING] No user-defined variables found in "
135+
f"'{user_vars_file}'.")
136+
except FileNotFoundError:
137+
print(
138+
f"[WARNING] User variables file '{user_vars_file}' not found. "
139+
"No user-defined variables loaded.")
140+
except Exception as e:
141+
print(
142+
f"[ERROR] Failed to load user-defined variables: {e}",
143+
file=sys.stderr,
144+
)
58145
sys.exit(1)
59146

60147
def run(self):
148+
model_conf = {}
61149
if self.config_file and self.config_name:
62-
self._load_env_from_config()
150+
model_conf = self._load_env_from_config_file()
151+
if "MODEL" in model_conf:
152+
self.config_envs["MODEL"] = model_conf["MODEL"]
153+
154+
env_model = os.environ.get("MODEL")
155+
if env_model:
156+
self.config_envs["MODEL"] = env_model
157+
158+
if not self.config_envs.get("MODEL"):
159+
print("[ERROR] MODEL is not set. Exiting.", file=sys.stderr)
160+
sys.exit(1)
161+
162+
self._load_env_from_defaults()
163+
164+
if model_conf:
165+
self.config_envs.update(model_conf)
63166

64167
if self.mode == "server":
65168
print("[INFO] Starting container in server mode.")
66-
# VarsGenerator will read variables from the environment
67169
for key, value in self.config_envs.items():
68170
os.environ[str(key)] = str(value)
69171
variables = VarsGenerator(
70172
defaults_path="server_autoconfig/defaults.yaml",
71173
varlist_conf_path="server_autoconfig/varlist_conf.yaml",
72-
model_def_settings_path=("server_autoconfig/settings_vllm.csv"
73-
)).calculate_variables()
174+
model_def_settings_path=(
175+
"server_autoconfig/settings_vllm.csv"),
176+
).calculate_variables()
74177
ScriptGenerator(
75178
template_script_path="templates/template_vllm_server.sh",
76179
output_script_path="vllm_server.sh",
77180
variables=variables,
78-
log_dir="logs").create_and_run()
181+
log_dir="logs",
182+
).create_and_run()
79183
elif self.mode == "benchmark":
80184
print("[INFO] Starting container in benchmark mode.")
185+
self._update_benchmark_envs_from_user_vars()
81186
ScriptGenerator(
82187
template_script_path="templates/template_vllm_benchmark.sh",
83188
output_script_path="vllm_benchmark.sh",
84189
variables=self.config_envs,
85-
log_dir="logs").create_and_run()
190+
log_dir="logs",
191+
).create_and_run()
86192
elif self.mode == "test":
87193
print("[INFO] Test mode: keeping container active. "
88194
"Press Ctrl+C to exit.")
@@ -102,17 +208,21 @@ def run(self):
102208
if __name__ == "__main__":
103209
parser = argparse.ArgumentParser(
104210
description="EntrypointMain for vllm docker")
105-
parser.add_argument("mode",
106-
nargs="?",
107-
default="server",
108-
choices=["server", "benchmark", "test"],
109-
help="Mode to run: server, benchmark, or test")
211+
parser.add_argument(
212+
"mode",
213+
nargs="?",
214+
default="server",
215+
choices=["server", "benchmark", "test"],
216+
help="Mode to run: server, benchmark, or test",
217+
)
110218
parser.add_argument("--config-file", type=str, help="Path to config file")
111219
parser.add_argument("--config-name",
112220
type=str,
113221
help="Config name in the config file")
114222
args = parser.parse_args()
115223

116-
EntrypointMain(mode=args.mode,
117-
config_file=args.config_file,
118-
config_name=args.config_name).run()
224+
EntrypointMain(
225+
mode=args.mode,
226+
config_file=args.config_file,
227+
config_name=args.config_name,
228+
).run()

0 commit comments

Comments
 (0)