Skip to content

Commit 223a619

Browse files
authored
When evals fail, exit with status code 1 (#61)
2 parents f5988c3 + 04d7f6c commit 223a619

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

cmd/eval/eval.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ type EvaluationResult struct {
4848
Details string `json:"details,omitempty"`
4949
}
5050

51+
var FailedTests = errors.New("❌ Some tests failed.")
52+
5153
// NewEvalCommand returns a new command to evaluate prompts against models
5254
func NewEvalCommand(cfg *command.Config) *cobra.Command {
5355
cmd := &cobra.Command{
@@ -106,7 +108,14 @@ func NewEvalCommand(cfg *command.Config) *cobra.Command {
106108
jsonOutput: jsonOutput,
107109
}
108110

109-
return handler.runEvaluation(cmd.Context())
111+
err = handler.runEvaluation(cmd.Context())
112+
if err == FailedTests {
113+
// Cobra by default will show the help message when an error occurs,
114+
// which is not what we want for failed evaluations.
115+
// Instead, we just want to exit with a non-zero code.
116+
cmd.SilenceUsage = true
117+
}
118+
return err
110119
},
111120
}
112121

@@ -206,6 +215,10 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error {
206215
h.printSummary(passedTests, totalTests, passRate)
207216
}
208217

218+
if totalTests-passedTests > 0 {
219+
return FailedTests
220+
}
221+
209222
return nil
210223
}
211224

@@ -249,8 +262,6 @@ func (h *evalCommandHandler) printSummary(passedTests, totalTests int, passRate
249262

250263
if passedTests == totalTests {
251264
h.cfg.WriteToOut("🎉 All tests passed!\n")
252-
} else {
253-
h.cfg.WriteToOut("❌ Some tests failed.\n")
254265
}
255266
}
256267

cmd/eval/eval_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ evaluators:
306306
cmd.SetArgs([]string{promptFile})
307307

308308
err = cmd.Execute()
309-
require.NoError(t, err)
309+
require.ErrorIs(t, err, FailedTests)
310310

311311
output := out.String()
312312
require.Contains(t, output, "Failing Test")
@@ -376,7 +376,7 @@ evaluators:
376376
cmd.SetArgs([]string{"--json", promptFile})
377377

378378
err = cmd.Execute()
379-
require.NoError(t, err)
379+
require.ErrorIs(t, err, FailedTests)
380380

381381
output := out.String()
382382

@@ -549,7 +549,7 @@ evaluators:
549549
cmd.SetArgs([]string{"--json", promptFile})
550550

551551
err = cmd.Execute()
552-
require.NoError(t, err)
552+
require.ErrorIs(t, err, FailedTests)
553553

554554
output := out.String()
555555

0 commit comments

Comments
 (0)