Skip to content

Commit bd8357f

Browse files
Fletcher Gornickfmgornick
authored andcommitted
feat: uri decode function
1 parent 137018d commit bd8357f

File tree

9 files changed

+139
-8
lines changed

9 files changed

+139
-8
lines changed

Makefile.am

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ endif
141141

142142
### Tests (make check)
143143

144-
TESTS = tests/mantest tests/jqtest tests/shtest tests/utf8test tests/base64test
144+
TESTS = tests/mantest tests/jqtest tests/shtest tests/utf8test tests/base64test tests/uritest
145145
if !WIN32
146146
TESTS += tests/optionaltest
147147
endif
@@ -218,7 +218,6 @@ EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
218218
jq.1.prebuilt jq.spec src/lexer.c src/lexer.h src/parser.c \
219219
src/parser.h src/version.h src/builtin.jq scripts/version \
220220
libjq.pc \
221-
tests/base64.test tests/jq-f-test.sh tests/jq.test \
222221
tests/modules/a.jq tests/modules/b/b.jq tests/modules/c/c.jq \
223222
tests/modules/c/d.jq tests/modules/data.json \
224223
tests/modules/home1/.jq tests/modules/home2/.jq/g.jq \
@@ -232,7 +231,7 @@ EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
232231
tests/onig.supp tests/local.supp \
233232
tests/setup tests/torture/input0.json \
234233
tests/optional.test tests/man.test tests/manonig.test \
235-
tests/jq.test tests/onig.test tests/base64.test \
234+
tests/jq.test tests/onig.test tests/base64.test tests/uri.test \
236235
tests/utf8-truncate.jq tests/jq-f-test.sh \
237236
tests/no-main-program.jq tests/yes-main-program.jq
238237

docs/content/manual/dev/manual.yml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,7 @@ sections:
13001300
input: '[1,[[],{"a":2}]]'
13011301
output: ['[[0],[1,1,"a"]]']
13021302

1303-
- title: "`add`"
1303+
- title: "`add`, `add(generator)`"
13041304
body: |
13051305
13061306
The filter `add` takes as input an array, and produces as
@@ -1311,6 +1311,9 @@ sections:
13111311
13121312
If the input is an empty array, `add` returns `null`.
13131313
1314+
`add(generator)` operates on the given generator rather than
1315+
the input.
1316+
13141317
examples:
13151318
- program: add
13161319
input: '["a","b","c"]'
@@ -1321,6 +1324,9 @@ sections:
13211324
- program: add
13221325
input: '[]'
13231326
output: ["null"]
1327+
- program: add(.[].a)
1328+
input: '[{"a":3}, {"a":5}, {"b":6}]'
1329+
output: ['8']
13241330

13251331
- title: "`any`, `any(condition)`, `any(generator; condition)`"
13261332
body: |
@@ -2135,6 +2141,11 @@ sections:
21352141
Applies percent-encoding, by mapping all reserved URI
21362142
characters to a `%XX` sequence.
21372143
2144+
* `@urid`:
2145+
2146+
The inverse of `@uri`, applies percent-decoding, by mapping
2147+
all `%XX` sequences to their corresponding URI characters.
2148+
21382149
* `@csv`:
21392150
21402151
The input must be an array, and it is rendered as CSV

jq.1.prebuilt

Lines changed: 15 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/builtin.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,48 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
657657
}
658658
jv_free(input);
659659
return line;
660+
} else if (!strcmp(fmt_s, "urid")) {
661+
jv_free(fmt);
662+
input = f_tostring(jq, input);
663+
664+
jv line = jv_string("");
665+
const char *errmsg = "is not a valid uri encoding";
666+
const char *s = jv_string_value(input);
667+
while (*s) {
668+
if (*s != '%') {
669+
line = jv_string_append_buf(line, s++, 1);
670+
} else {
671+
unsigned char unicode[4] = {0};
672+
int b = 0;
673+
// check leading bits of first octet to determine length of unicode character
674+
// (https://datatracker.ietf.org/doc/html/rfc3629#section-3)
675+
while (b == 0 || (b < 4 && unicode[0] >> 7 & 1 && unicode[0] >> (7-b) & 1)) {
676+
if (*(s++) != '%') {
677+
jv_free(line);
678+
return type_error(input, errmsg);
679+
}
680+
for (int i=0; i<2; i++) {
681+
unicode[b] <<= 4;
682+
char c = *(s++);
683+
if ('0' <= c && c <= '9') unicode[b] |= c - '0';
684+
else if ('a' <= c && c <= 'f') unicode[b] |= c - 'a' + 10;
685+
else if ('A' <= c && c <= 'F') unicode[b] |= c - 'A' + 10;
686+
else {
687+
jv_free(line);
688+
return type_error(input, errmsg);
689+
}
690+
}
691+
b++;
692+
}
693+
if (!jvp_utf8_is_valid((const char *)unicode, (const char *)unicode+b)) {
694+
jv_free(line);
695+
return type_error(input, errmsg);
696+
}
697+
line = jv_string_append_buf(line, (const char *)unicode, b);
698+
}
699+
}
700+
jv_free(input);
701+
return line;
660702
} else if (!strcmp(fmt_s, "sh")) {
661703
jv_free(fmt);
662704
if (jv_get_kind(input) != JV_KIND_ARRAY)

src/builtin.jq

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ def unique: group_by(.) | map(.[0]);
88
def unique_by(f): group_by(f) | map(.[0]);
99
def max_by(f): _max_by_impl(map([f]));
1010
def min_by(f): _min_by_impl(map([f]));
11-
def add: reduce .[] as $x (null; . + $x);
11+
def add(f): reduce f as $x (null; . + $x);
12+
def add: add(.[]);
1213
def del(f): delpaths([path(f)]);
1314
def abs: if . < 0 then - . else . end;
1415
def _assign(paths; $value): reduce path(paths) as $p (.; setpath($p; $value));

tests/jq.test

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,15 @@ null
6161
null
6262
"interpolation"
6363

64-
@text,@json,([1,.]|@csv,@tsv),@html,@uri,@sh,(@base64|.,@base64d)
64+
@text,@json,([1,.]|@csv,@tsv),@html,(@uri|.,@urid),@sh,(@base64|.,@base64d)
6565
"!()<>&'\"\t"
6666
"!()<>&'\"\t"
6767
"\"!()<>&'\\\"\\t\""
6868
"1,\"!()<>&'\"\"\t\""
6969
"1\t!()<>&'\"\\t"
7070
"!()&lt;&gt;&amp;&apos;&quot;\t"
7171
"%21%28%29%3C%3E%26%27%22%09"
72+
"!()<>&'\"\t"
7273
"'!()<>&'\\''\"\t'"
7374
"ISgpPD4mJyIJ"
7475
"!()<>&'\"\t"
@@ -86,6 +87,10 @@ null
8687
"\u03bc"
8788
"%CE%BC"
8889

90+
@urid
91+
"%CE%BC"
92+
"\u03bc"
93+
8994
@html "<b>\(.)</b>"
9095
"<script>hax</script>"
9196
"<b>&lt;script&gt;hax&lt;/script&gt;</b>"
@@ -642,6 +647,19 @@ map_values(.+1)
642647
[0,1,2]
643648
[1,2,3]
644649

650+
[add(null), add(range(range(10))), add(empty), add(10,range(10))]
651+
null
652+
[null,120,null,55]
653+
654+
# Real-world use case for add(empty)
655+
.sum = add(.arr[])
656+
{"arr":[]}
657+
{"arr":[],"sum":null}
658+
659+
add({(.[]):1}) | keys
660+
["a","a","b","a","d","b","d","a","d"]
661+
["a","b","d"]
662+
645663
#
646664
# User-defined functions
647665
# Oh god.

tests/man.test

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/uri.test

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Tests are groups of three lines: program, input, expected output
2+
# Blank lines and lines starting with # are ignored
3+
4+
@uri
5+
"<>&'\"\t"
6+
"%3C%3E%26%27%22%09"
7+
8+
# decoding encoded output results in same text
9+
(@uri|@urid)
10+
"<>&'\"\t"
11+
"<>&'\"\t"
12+
13+
# testing variable length unicode characters
14+
@uri
15+
"a \u03bc \u2230 \ud83d\ude0e"
16+
"a%20%CE%BC%20%E2%88%B0%20%F0%9F%98%8E"
17+
18+
@urid
19+
"a%20%CE%BC%20%E2%88%B0%20%F0%9F%98%8E"
20+
"a \u03bc \u2230 \ud83d\ude0e"
21+
22+
### invalid uri strings
23+
24+
# unicode character should be length 4 (not 3)
25+
. | try @urid catch .
26+
"%F0%93%81"
27+
"string (\"%F0%93%81\") is not a valid uri encoding"
28+
29+
# invalid hex value ('FX')
30+
. | try @urid catch .
31+
"%FX%9F%98%8E"
32+
"string (\"%FX%9F%98%8E\") is not a valid uri encoding"
33+
34+
# trailing utf-8 octets must be formatted like 10xxxxxx
35+
# 'C0' = 11000000 invalid
36+
. | try @urid catch .
37+
"%F0%C0%81%8E"
38+
"string (\"%F0%C0%81%8E\") is not a valid uri encoding"

tests/uritest

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/sh
2+
3+
. "${0%/*}/setup" "$@"
4+
5+
$VALGRIND $Q $JQ -L "$mods" --run-tests $JQTESTDIR/uri.test

0 commit comments

Comments
 (0)