From 2d4989fc89089b64b4e245615fbbf00d72dc4ce4 Mon Sep 17 00:00:00 2001 From: Jaime Adan Cuevas Ramirez Date: Wed, 2 Apr 2025 16:17:10 -0600 Subject: [PATCH] Create refactoring_of_benchmarks.py Error handling (invalid JSON, missing files) Automatic fixes (correct dataset paths if names mismatch) Clear logging with warnings and actions --- refactoring_of_benchmarks.py | 64 ++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 refactoring_of_benchmarks.py diff --git a/refactoring_of_benchmarks.py b/refactoring_of_benchmarks.py new file mode 100644 index 00000000..9db4ec96 --- /dev/null +++ b/refactoring_of_benchmarks.py @@ -0,0 +1,64 @@ +import os +import json + +# Configuration +CONFIG_FILE = "xgb_cpu_main_config.json" +DATASET_FOLDER = "dataset" +EXPECTED_DATASETS = ["mlsr", "mortgage1Q", "plasticc", "santander"] + +def load_config(): + """Load the benchmark configuration file.""" + if not os.path.exists(CONFIG_FILE): + print(f"ERROR: Configuration file '{CONFIG_FILE}' not found. Verify its location.") + return None + + with open(CONFIG_FILE, "r") as f: + try: + return json.load(f) + except json.JSONDecodeError: + print(f"ERROR: Failed to parse '{CONFIG_FILE}'. Ensure it contains valid JSON.") + return None + +def check_datasets(): + """Check if required datasets exist in the dataset folder.""" + missing_datasets = [] + for dataset in EXPECTED_DATASETS: + dataset_path = os.path.join(DATASET_FOLDER, dataset) + if not os.path.exists(dataset_path): + print(f"āš ļø WARNING: Dataset '{dataset}' is missing in '{DATASET_FOLDER}'.") + missing_datasets.append(dataset) + + if missing_datasets: + print("\nšŸ”¹ Suggested Actions:") + print("- Ensure dataset names are correct in the 'dataset/' folder.") + print("- Download the missing datasets if necessary.") + print("- If dataset names differ, update 'xgb_cpu_main_config.json'.\n") + + return missing_datasets + +def update_config(missing_datasets): + """Fix dataset names in the configuration file if necessary.""" + config = load_config() + if not config: + return + + updated = False + for dataset in missing_datasets: + if dataset in config.get("datasets", {}): + print(f"šŸ› ļø Fixing dataset path for '{dataset}' in {CONFIG_FILE}...") + config["datasets"][dataset] = os.path.join(DATASET_FOLDER, f"{dataset}.csv") # Adjust extension if necessary + updated = True + + if updated: + with open(CONFIG_FILE, "w") as f: + json.dump(config, f, indent=4) + print(f"āœ… {CONFIG_FILE} has been updated with corrected dataset paths.") + +if __name__ == "__main__": + print("šŸ” Checking dataset availability...\n") + missing = check_datasets() + + if missing: + update_config(missing) + else: + print("āœ… All datasets are present. You can proceed with benchmarking.")