HabanaAI
diff --git a/‎.cd/benchmark_configurations/benchmark_text.yaml
Lines changed: 43 additions & 0 deletions b/‎.cd/benchmark_configurations/benchmark_text.yaml
Lines changed: 43 additions & 0 deletions
diff --git a/‎.cd/benchmark_configurations/benchmark_vision.yaml
Lines changed: 5 additions & 0 deletions b/‎.cd/benchmark_configurations/benchmark_vision.yaml
Lines changed: 5 additions & 0 deletions
diff --git a/‎.cd/benchmark_configurations/defaults.yaml
Lines changed: 35 additions & 0 deletions b/‎.cd/benchmark_configurations/defaults.yaml
Lines changed: 35 additions & 0 deletions
diff --git a/‎.cd/benchmark_configurations/llama8b.yaml
Lines changed: 0 additions & 27 deletions b/‎.cd/benchmark_configurations/llama8b.yaml
Lines changed: 0 additions & 27 deletions
diff --git a/‎.cd/benchmark_configurations/user_vars.yaml
Lines changed: 6 additions & 0 deletions b/‎.cd/benchmark_configurations/user_vars.yaml
Lines changed: 6 additions & 0 deletions
diff --git a/‎.cd/docker-compose.yml
Lines changed: 16 additions & 1 deletion b/‎.cd/docker-compose.yml
Lines changed: 16 additions & 1 deletion
diff --git a/‎.cd/entrypoints/entrypoint_main.py
Lines changed: 142 additions & 32 deletions b/‎.cd/entrypoints/entrypoint_main.py
Lines changed: 142 additions & 32 deletions
@@ -0,0 +1,43 @@
+deepSeek-R1-Distill-Llama-70B:
+  MODEL: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+
+llama31_8b_instruct:
+  MODEL: meta-llama/Llama-3.1-8B-Instruct
+
+llama31_70b_instruct:
+  MODEL: meta-llama/Llama-3.1-70B-Instruct
+
+llama31_405b_instruct:
+  MODEL: meta-llama/Llama-3.1-405B-Instruct
+
+llama32_1b_instruct:
+  MODEL: meta-llama/Llama-3.2-1B-Instruct
+
+llama32_3b_instruct:
+  MODEL: meta-llama/Llama-3.2-3B-Instruct
+
+llama33_70b_instruct:
+  MODEL: meta-llama/Llama-3.3-70B-Instruct
+
+mistral_7b_instruct_v02:
+  MODEL: mistralai/Mistral-7B-Instruct-v0.2
+
+mixtral_8x22b_instruct_v01:
+  MODEL: mistralai/Mixtral-8x22B-Instruct-v0.1
+
+mixtral_8x7b_instruct_v01:
+  MODEL: mistralai/Mixtral-8x7B-Instruct-v0.1
+
+qwen25_14b_instruct:
+  MODEL: Qwen/Qwen2.5-14B-Instruct
+
+qwen25_32b_instruct:
+  MODEL: Qwen/Qwen2.5-32B-Instruct
+
+qwen25_72b_instruct:
+  MODEL: Qwen/Qwen2.5-72B-Instruct
+
+qwen25_7b_instruct:
+  MODEL: Qwen/Qwen2.5-7B-Instruct
+
+
@@ -0,0 +1,5 @@
+llama32-11B-Vision-Instruct:
+  MODEL: meta-llama/Llama-3.2-11B-Vision-Instruct
+
+llama32-90B-Vision-Instruct:
+  MODEL: meta-llama/Llama-3.2-90B-Vision-Instruct
@@ -0,0 +1,35 @@
+defaults_text:
+  MODELS:
+    - deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+    - meta-llama/Llama-3.1-70B-Instruct
+    - meta-llama/Llama-3.1-405B-Instruct
+    - meta-llama/Llama-3.1-8B-Instruct
+    - meta-llama/Llama-3.2-1B-Instruct
+    - meta-llama/Llama-3.2-3B-Instruct
+    - meta-llama/Llama-3.3-70B-Instruct
+    - mistralai/Mistral-7B-Instruct-v0.2
+    - mistralai/Mixtral-8x22B-Instruct-v0.1
+    - mistralai/Mixtral-8x7B-Instruct-v0.1
+    - Qwen/Qwen2.5-14B-Instruct
+    - Qwen/Qwen2.5-32B-Instruct
+    - Qwen/Qwen2.5-72B-Instruct
+    - Qwen/Qwen2.5-7B-Instruct
+  DATASET: /workspace/vllm/benchmarks/sonnet.txt
+  DATASET_NAME: sonnet
+  BACKEND: vllm
+  INPUT_TOK: 2048
+  OUTPUT_TOK: 2048
+  CONCURRENT_REQ: 64
+  NUM_PROMPTS: 640
+  MAX_MODEL_LEN: 4352
+  PREFIX_LEN: 100
+
+defaults_vision:
+  MODELS:
+    - meta-llama/Llama-3.2-11B-Vision-Instruct
+    - meta-llama/Llama-3.2-90B-Vision-Instruct
+  DATASET: lmarena-ai/vision-arena-bench-v0.1
+  DATASET_NAME: hf
+  BACKEND: openai-chat
+  CONCURRENT_REQ: 64
+  NUM_PROMPTS: 500
@@ -0,0 +1,6 @@
+variables:
+  - MODEL
+  - INPUT_TOK
+  - OUTPUT_TOK
+  - CON_REQ
+  - NUM_PROMPTS
@@ -2,10 +2,20 @@ services:
   vllm-server:
     image: ${DOCKER_IMAGE}
     environment:
+      - MODEL
       - HF_HOME=/mnt/hf_cache
-      - HF_TOKEN=${HF_TOKEN}
+      - HF_TOKEN
       - HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES:-all}
       - PYTHONUNBUFFERED=1
+      - PT_HPU_LAZY_MODE
+      - VLLM_DECODE_BLOCK_BUCKET_STEP
+      - VLLM_DECODE_BS_BUCKET_STEP
+      - VLLM_PROMPT_BS_BUCKET_STEP
+      - VLLM_PROMPT_SEQ_BUCKET_STEP
+      - VLLM_SKIP_WARMUP
+      - MAX_MODEL_LEN
+      - MAX_NUM_SEQS
+      - TENSOR_PARALLEL_SIZE
     volumes:
       - /mnt/hf_cache:/mnt/hf_cache
     ports:
@@ -31,6 +41,11 @@ services:
       vllm-server:
         condition: service_healthy
     environment:
+      - MODEL
       - HF_TOKEN=${HF_TOKEN}
       - PYTHONUNBUFFERED=1
+      - INPUT_TOK
+      - OUTPUT_TOK
+      - CON_REQ
+      - NUM_PROMPTS
     command: ["benchmark", "--config-file", "${VLLM_BENCHMARK_CONFIG_FILE}", "--config-name", "${VLLM_BENCHMARK_CONFIG_NAME}"]
@@ -29,60 +29,166 @@ def __init__(self, mode="server", config_file=None, config_name=None):
                 file=sys.stderr)
             sys.exit(1)
 
-    def _load_env_from_config(self):
+    def _load_env_from_defaults(self):
+        """
+        Loads default environment variables from a YAML file based on the mode.
+        For each section starting with 'defaults_', if model is in the section's
+        'MODELS' list, loads the environment variables from that section.
+        If no section matches, loads nothing.
+        If the file does not exist, it returns an empty dictionary.
+        """
+        defaults_file = ("server_configurations/defaults.yaml"
+                         if self.mode == "server" else
+                         "benchmark_configurations/defaults.yaml")
+        try:
+            with open(defaults_file) as f:
+                config = yaml.safe_load(f)
+                found = False
+                for section_name, section in config.items():
+                    if section_name.startswith("defaults_") and isinstance(
+                            section, dict):
+                        models = section.get("MODELS", [])
+                        if (isinstance(models, list)
+                                and self.config_envs.get("MODEL") in models):
+                            env_vars = {
+                                k: v
+                                for k, v in section.items() if k != "MODELS"
+                            }
+                            self.config_envs.update(env_vars)
+                            print(
+                                f"[INFO] Loaded default configuration section "
+                                f"'{section_name}' for model "
+                                f"'{self.config_envs.get('MODEL')}' from file: "
+                                f"{defaults_file}")
+                            for key, value in env_vars.items():
+                                print(f"    {key}: {value}")
+                            found = True
+                if not found:
+                    print(f"[WARNING] No defaults section found for model "
+                          f"'{self.config_envs.get('MODEL')}' in "
+                          f"'{defaults_file}'.")
+        except FileNotFoundError:
+            print(f"[WARNING] Defaults file '{defaults_file}' not found. "
+                  "No defaults loaded.")
+        except Exception as e:
+            print(
+                f"[ERROR] Failed to load defaults: {e}",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+    def _load_env_from_config_file(self):
+        """
+        Loads a specific configuration section from a YAML file and updates the
+        current environment configuration with the values from that section.
+        If a key already exists (e.g., from defaults), it will be overwritten
+        by the value from the file. Exits the program with an error message if
+        the section is missing or invalid, or if the file cannot be read.
+
+        Raises:
+            SystemExit: If the configuration file or section is missing,
+            invalid, or cannot be loaded.
+        """
         try:
             with open(self.config_file) as f:
                 config = yaml.safe_load(f)
                 section = config.get(self.config_name)
-                if section is None:
-                    print(
-                        f"[ERROR] Section '{self.config_name}' not found in "
-                        f"'{self.config_file}'.",
-                        file=sys.stderr)
-                    sys.exit(1)
-                if not isinstance(section, dict):
+                if section is None or not isinstance(section, dict):
                     print(
-                        f"[ERROR] Section '{self.config_name}' is not a "
-                        f"dictionary in '{self.config_file}'.",
-                        file=sys.stderr)
+                        f"[ERROR] Section '{self.config_name}' not found or "
+                        f"is not a dictionary in '{self.config_file}'.",
+                        file=sys.stderr,
+                    )
                     sys.exit(1)
-                self.config_envs = section
-                print(f"[INFO] Loaded configuration from file: "
-                      f"{self.config_file}, section: {self.config_name}")
-                print("[INFO] The following parameters and values were loaded "
-                      "from the config file:")
-                for key, value in self.config_envs.items():
+                print(f"[INFO] Loaded configuration section "
+                      f"'{self.config_name}' from file: {self.config_file}")
+                for key, value in section.items():
                     print(f"    {key}: {value}")
+                return section
         except Exception as e:
-            print(f"[ERROR] Failed to load config: {e}", file=sys.stderr)
+            print(
+                f"[ERROR] Failed to load config: {e}",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+    def _update_benchmark_envs_from_user_vars(self):
+        """
+        Loads a list of variable names from a YAML file and, for each variable
+        present in the current environment, updates the internal configuration
+        dictionary with the environment value. If the YAML file is missing or
+        empty, no variables are updated.
+        """
+        user_vars_file = "benchmark_configurations/user_vars.yaml"
+        try:
+            with open(user_vars_file) as f:
+                user_vars = yaml.safe_load(f)
+                if user_vars and isinstance(user_vars, dict):
+                    variables = user_vars.get("variables", [])
+                    for var in variables:
+                        if var in os.environ:
+                            self.config_envs[var] = os.environ[var]
+                            print(f"[INFO] Overwriting {var} with value from "
+                                  f"environment: {self.config_envs[var]}")
+                else:
+                    print(f"[WARNING] No user-defined variables found in "
+                          f"'{user_vars_file}'.")
+        except FileNotFoundError:
+            print(
+                f"[WARNING] User variables file '{user_vars_file}' not found. "
+                "No user-defined variables loaded.")
+        except Exception as e:
+            print(
+                f"[ERROR] Failed to load user-defined variables: {e}",
+                file=sys.stderr,
+            )
             sys.exit(1)
 
     def run(self):
+        model_conf = {}
         if self.config_file and self.config_name:
-            self._load_env_from_config()
+            model_conf = self._load_env_from_config_file()
+            if "MODEL" in model_conf:
+                self.config_envs["MODEL"] = model_conf["MODEL"]
+
+        env_model = os.environ.get("MODEL")
+        if env_model:
+            self.config_envs["MODEL"] = env_model
+
+        if not self.config_envs.get("MODEL"):
+            print("[ERROR] MODEL is not set. Exiting.", file=sys.stderr)
+            sys.exit(1)
+
+        self._load_env_from_defaults()
+
+        if model_conf:
+            self.config_envs.update(model_conf)
 
         if self.mode == "server":
             print("[INFO] Starting container in server mode.")
-            # VarsGenerator will read variables from the environment
             for key, value in self.config_envs.items():
                 os.environ[str(key)] = str(value)
             variables = VarsGenerator(
                 defaults_path="server_autoconfig/defaults.yaml",
                 varlist_conf_path="server_autoconfig/varlist_conf.yaml",
-                model_def_settings_path=("server_autoconfig/settings_vllm.csv"
-                                         )).calculate_variables()
+                model_def_settings_path=(
+                    "server_autoconfig/settings_vllm.csv"),
+            ).calculate_variables()
             ScriptGenerator(
                 template_script_path="templates/template_vllm_server.sh",
                 output_script_path="vllm_server.sh",
                 variables=variables,
-                log_dir="logs").create_and_run()
+                log_dir="logs",
+            ).create_and_run()
         elif self.mode == "benchmark":
             print("[INFO] Starting container in benchmark mode.")
+            self._update_benchmark_envs_from_user_vars()
             ScriptGenerator(
                 template_script_path="templates/template_vllm_benchmark.sh",
                 output_script_path="vllm_benchmark.sh",
                 variables=self.config_envs,
-                log_dir="logs").create_and_run()
+                log_dir="logs",
+            ).create_and_run()
         elif self.mode == "test":
             print("[INFO] Test mode: keeping container active. "
                   "Press Ctrl+C to exit.")
@@ -102,17 +208,21 @@ def run(self):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="EntrypointMain for vllm docker")
-    parser.add_argument("mode",
-                        nargs="?",
-                        default="server",
-                        choices=["server", "benchmark", "test"],
-                        help="Mode to run: server, benchmark, or test")
+    parser.add_argument(
+        "mode",
+        nargs="?",
+        default="server",
+        choices=["server", "benchmark", "test"],
+        help="Mode to run: server, benchmark, or test",
+    )
     parser.add_argument("--config-file", type=str, help="Path to config file")
     parser.add_argument("--config-name",
                         type=str,
                         help="Config name in the config file")
     args = parser.parse_args()
 
-    EntrypointMain(mode=args.mode,
-                   config_file=args.config_file,
-                   config_name=args.config_name).run()
+    EntrypointMain(
+        mode=args.mode,
+        config_file=args.config_file,
+        config_name=args.config_name,
+    ).run()