[Feature] Make dump-eval-details default behavior (open-compass#1999)

MaiziXiao · web-flow · commit a05f9da134e9 · 2025-04-08T14:42:26.000+08:00
* Update

* update

* update
diff --git a/docs/en/user_guides/experimentation.md b/docs/en/user_guides/experimentation.md
@@ -57,7 +57,7 @@ The parameter explanation is as follows:
 - `-w`: Specify the working path, default is `./outputs/default`.
 - `-l`: Enable status reporting via Lark bot.
 - `--dry-run`: When enabled, inference and evaluation tasks will be dispatched but won't actually run for debugging.
-- `--dump-eval-details`: When enabled，evaluation under the `results` folder will include more details, such as the correctness of each sample.
+- `--dump-eval-details`: Default enabled，evaluation under the `results` folder will include more details, such as the correctness of each sample. Set `--dump-eval-details False` to disable it。
 
 Using run mode `-m all` as an example, the overall execution flow is as follows:
 
diff --git a/docs/zh_cn/user_guides/experimentation.md b/docs/zh_cn/user_guides/experimentation.md
@@ -57,7 +57,7 @@ python run.py $EXP {--slurm | --dlc | None} [-p PARTITION] [-q QUOTATYPE] [--deb
 - `-w`: 指定工作路径，默认为 `./outputs/default`
 - `-l`: 打开飞书机器人状态上报。
 - `--dry-run`: 开启时，推理和评测任务仅会分发但不会真正运行，便于调试；
-- `--dump-eval-details`: 开启时，`results` 下的评测结果中将会包含更加详细的评测结果信息，例如每条样本是否正确等。
+- `--dump-eval-details`: 默认开启，`results` 下的评测结果中将会包含更加详细的评测结果信息，例如每条样本是否正确等。如不需要开启，需设置`--dump-eval-details False`。
 
 以运行模式 `-m all` 为例，整体运行流如下：
 
diff --git a/opencompass/cli/main.py b/opencompass/cli/main.py
@@ -119,8 +119,11 @@ def parse_args():
     parser.add_argument(
         '--dump-eval-details',
         help='Whether to dump the evaluation details, including the '
-        'correctness of each sample, bpb, etc.',
-        action='store_true',
+        'correctness of each sample, bpb, etc. Defaults to True.',
+        nargs='?',
+        const=True,
+        default=True,
+        type=lambda x: False if x and x.lower() == 'false' else True
     )
     parser.add_argument(
         '--dump-extract-rate',
@@ -233,7 +236,6 @@ def parse_custom_dataset_args(custom_dataset_parser):
 
 def main():
     args = parse_args()
-
     if args.num_gpus is not None:
         raise ValueError('The `--num-gpus` argument is deprecated, please use '
                          '`--hf-num-gpus` to describe number of gpus used for '
@@ -350,6 +352,9 @@ def main():
         if args.dlc or args.slurm or cfg.get('eval', None) is None:
             fill_eval_cfg(cfg, args)
         if args.dump_eval_details:
+            logger.warning('Default to dump eval details, it might take extra'
+                        'space to save all the evaluation details. '
+                        'Set --dump-eval-details False to skip the details dump')
             cfg.eval.runner.task.dump_details = True
         if args.dump_extract_rate:
             cfg.eval.runner.task.cal_extract_rate = True