Skip to content

Commit ce0e788

Browse files
authored
improve tonumber/0 performance by parsing input as number literal
Previously, the tonumber/0 filter parses the input as JSON values, but this is less-performant on large non-number strings. Parsing the input string as number literal fixes the performance issue. Also, this fix changes the filter to reject numbers with white spaces.
1 parent 913b264 commit ce0e788

File tree

2 files changed

+19
-6
lines changed

2 files changed

+19
-6
lines changed

src/builtin.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ void *alloca (size_t);
4343
#include "locfile.h"
4444
#include "jv_unicode.h"
4545
#include "jv_alloc.h"
46+
#include "jv_dtoa.h"
47+
#include "jv_dtoa_tsd.h"
4648
#include "jv_private.h"
4749
#include "util.h"
4850

@@ -464,11 +466,22 @@ static jv f_tonumber(jq_state *jq, jv input) {
464466
return input;
465467
}
466468
if (jv_get_kind(input) == JV_KIND_STRING) {
467-
jv parsed = jv_parse(jv_string_value(input));
468-
if (!jv_is_valid(parsed) || jv_get_kind(parsed) == JV_KIND_NUMBER) {
469-
jv_free(input);
470-
return parsed;
469+
const char* s = jv_string_value(input);
470+
#ifdef USE_DECNUM
471+
jv number = jv_number_with_literal(s);
472+
if (jv_get_kind(number) == JV_KIND_INVALID) {
473+
return type_error(input, "cannot be parsed as a number");
474+
}
475+
#else
476+
char *end = 0;
477+
double d = jvp_strtod(tsd_dtoa_context_get(), s, &end);
478+
if (end == 0 || *end != 0) {
479+
return type_error(input, "cannot be parsed as a number");
471480
}
481+
jv number = jv_number(d);
482+
#endif
483+
jv_free(input);
484+
return number;
472485
}
473486
return type_error(input, "cannot be parsed as a number");
474487
}

tests/jq.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,8 +2000,8 @@ null
20002000
2
20012001

20022002
.[] |= try tonumber
2003-
["1", "2a", "3", 4]
2004-
[1, 3, 4]
2003+
["1", "2a", "3", " 4 ", "5.67", ".89", "-876", "+5.43", 21]
2004+
[1, 3, 5.67, 0.89, -876, 5.43, 21]
20052005

20062006
# Also 1859, but from 2073
20072007
any(keys[]|tostring?;true)

0 commit comments

Comments
 (0)