Skip to content

Commit 4866769

Browse files
committed
refactor: move bsearch function to C-code
This commit fixes issue #527 and move the bsearch function to a native C-code. The performance is a bit better: Testing script: ```bash clear if [[ `uname` == Darwin ]]; then MAX_MEMORY_UNITS=KB else MAX_MEMORY_UNITS=MB fi export TIMEFMT='%J %U user %S system %P cpu %*E total'$'\n'\ 'avg shared (code): %X KB'$'\n'\ 'avg unshared (data/stack): %D KB'$'\n'\ 'total (sum): %K KB'$'\n'\ 'max memory: %M '$MAX_MEMORY_UNITS''$'\n'\ 'page faults from disk: %F'$'\n'\ 'other page faults: %R' echo "JQ code bsearch" time /usr/bin/jq -n '[range(30000000)] | bsearch(3000)' echo "C code bsearch" time ./jq -n '[range(30000000)] | bsearch(3000)' ```` Results: ``` JQ code bsearch 3000 /usr/bin/jq -n '[range(30000000)] | bsearch(3000)' 8.63s user 0.77s system 98% cpu 9.542 total avg shared (code): 0 KB avg unshared (data/stack): 0 KB total (sum): 0 KB max memory: 823 MB page faults from disk: 1 other page faults: 432828 C code bsearch 3000 ./jq -n '[range(30000000)] | bsearch(3000)' 8.44s user 0.74s system 99% cpu 9.249 total avg shared (code): 0 KB avg unshared (data/stack): 0 KB total (sum): 0 KB max memory: 824 MB page faults from disk: 0 other page faults: 432766 ``` The results may be better if we can use jvp_array_read, and there is no need to copy/free the input array in each iteration. I guess that is like that for API pourposes when the libjq is in use with multiple threads in place. Signed-off-by: Eloy Coto <[email protected]>
1 parent 13e02ba commit 4866769

File tree

3 files changed

+60
-32
lines changed

3 files changed

+60
-32
lines changed

src/builtin.c

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ void *alloca (size_t);
4646
#include "jv_private.h"
4747
#include "util.h"
4848

49-
5049
#define BINOP(name) \
5150
static jv f_ ## name(jq_state *jq, jv input, jv a, jv b) { \
5251
jv_free(input); \
@@ -780,6 +779,61 @@ static jv f_sort_by_impl(jq_state *jq, jv input, jv keys) {
780779
}
781780
}
782781

782+
/* Assuming the input array is sorted, bsearch/1 returns */
783+
/* the index of the target if the target is in the input array; and otherwise */
784+
/* (-1 - ix), where ix is the insertion point that would leave the array sorted. */
785+
/* If the input is not sorted, bsearch will terminate but with irrelevant results. */
786+
static jv f_bsearch(jq_state *jq, jv input, jv target) {
787+
assert(jv_get_kind(input) == JV_KIND_ARRAY);
788+
int len = jv_array_length(jv_copy(input));
789+
if (len == 0) {
790+
jv_free(input);
791+
jv_free(target);
792+
return jv_number(-1);
793+
} else if (len == 1) {
794+
int result = jv_cmp(target, jv_array_get(input, 0));
795+
if (result == 0 ) {
796+
return jv_number(0);
797+
} else if (result > 0) {
798+
return jv_number(-2);
799+
} else {
800+
return jv_number(-1);
801+
}
802+
}
803+
804+
int start = 0;
805+
int end = len - 1;
806+
jv answer = jv_null();
807+
while (start <end) {
808+
int mid = (start + end) / 2;
809+
int result = jv_cmp(jv_copy(target), jv_array_get(jv_copy(input), mid));
810+
if (result == 0) {
811+
answer = jv_number(mid);
812+
break;
813+
} else if (start == end ) {
814+
answer = jv_number(-1);
815+
break;
816+
} else if (result < 0 ) {
817+
end = mid -1;
818+
} else {
819+
start = mid +1;
820+
}
821+
}
822+
if (jv_get_kind(answer) == JV_KIND_NULL) {
823+
int result = jv_cmp(target, jv_array_get(jv_copy(input), start));
824+
if (result < 0) {
825+
answer = jv_number(-1 - start);
826+
}else {
827+
answer = jv_number(-2 - start);
828+
}
829+
} else {
830+
jv_free(target);
831+
}
832+
833+
jv_free(input);
834+
return answer;
835+
}
836+
783837
static jv f_group_by_impl(jq_state *jq, jv input, jv keys) {
784838
if (jv_get_kind(input) == JV_KIND_ARRAY &&
785839
jv_get_kind(keys) == JV_KIND_ARRAY &&
@@ -1718,6 +1772,7 @@ BINOPS
17181772
{f_sort, "sort", 1},
17191773
{f_sort_by_impl, "_sort_by_impl", 2},
17201774
{f_group_by_impl, "_group_by_impl", 2},
1775+
{f_bsearch, "bsearch", 2},
17211776
{f_min, "min", 1},
17221777
{f_max, "max", 1},
17231778
{f_min_by_impl, "_min_by_impl", 2},

src/builtin.jq

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -215,37 +215,6 @@ def tostream:
215215
getpath($p) |
216216
reduce path(.[]?) as $q ([$p, .]; [$p+$q]);
217217

218-
# Assuming the input array is sorted, bsearch/1 returns
219-
# the index of the target if the target is in the input array; and otherwise
220-
# (-1 - ix), where ix is the insertion point that would leave the array sorted.
221-
# If the input is not sorted, bsearch will terminate but with irrelevant results.
222-
def bsearch($target):
223-
if length == 0 then -1
224-
elif length == 1 then
225-
if $target == .[0] then 0 elif $target < .[0] then -1 else -2 end
226-
else . as $in
227-
# state variable: [start, end, answer]
228-
# where start and end are the upper and lower offsets to use.
229-
| [0, length-1, null]
230-
| until( .[0] > .[1] ;
231-
if .[2] != null then (.[1] = -1) # i.e. break
232-
else
233-
( ( (.[1] + .[0]) / 2 ) | floor ) as $mid
234-
| $in[$mid] as $monkey
235-
| if $monkey == $target then (.[2] = $mid) # success
236-
elif .[0] == .[1] then (.[1] = -1) # failure
237-
elif $monkey < $target then (.[0] = ($mid + 1))
238-
else (.[1] = ($mid - 1))
239-
end
240-
end )
241-
| if .[2] == null then # compute the insertion point
242-
if $in[ .[0] ] < $target then (-2 -.[0])
243-
else (-1 -.[0])
244-
end
245-
else .[2]
246-
end
247-
end;
248-
249218
# Apply f to composite entities recursively, and to atoms
250219
def walk(f):
251220
def w:

tests/jq.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,6 +1553,10 @@ bsearch(0,2,4)
15531553
1
15541554
-4
15551555

1556+
bsearch({x:1}
1557+
[{ "x": 0 },{ "x": 1 },{ "x": 2 }]
1558+
1
1559+
15561560
# strptime tests are in optional.test
15571561

15581562
strftime("%Y-%m-%dT%H:%M:%SZ")

0 commit comments

Comments
 (0)