Skip to content

Commit 937a472

Browse files
committed
Merge remote-tracking branch 'origin/v4' into v4
2 parents 30805c8 + 4b2f324 commit 937a472

File tree

3 files changed

+60
-60
lines changed

3 files changed

+60
-60
lines changed

internal/lz4block/decode_arm64.s

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,25 @@
88
#include "textflag.h"
99

1010
// Register allocation.
11-
#define dst R0
12-
#define dstorig R1
13-
#define src R2
14-
#define dstend R3
15-
#define srcend R4
16-
#define match R5 // Match address.
17-
#define dict R6
18-
#define dictlen R7
19-
#define dictend R8
20-
#define token R9
21-
#define len R10 // Literal and match lengths.
22-
#define lenRem R11
23-
#define offset R12 // Match offset.
24-
#define tmp1 R13
25-
#define tmp2 R14
26-
#define tmp3 R15
27-
#define tmp4 R16
11+
#define dst R0
12+
#define dstorig R1
13+
#define src R2
14+
#define dstend R3
15+
#define dstend16 R4 // dstend - 16
16+
#define srcend R5
17+
#define srcend16 R6 // srcend - 16
18+
#define match R7 // Match address.
19+
#define dict R8
20+
#define dictlen R9
21+
#define dictend R10
22+
#define token R11
23+
#define len R12 // Literal and match lengths.
24+
#define lenRem R13
25+
#define offset R14 // Match offset.
26+
#define tmp1 R15
27+
#define tmp2 R16
28+
#define tmp3 R17
29+
#define tmp4 R19
2830

2931
// func decodeBlock(dst, src, dict []byte) int
3032
TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0-80
@@ -36,6 +38,12 @@ TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0-80
3638
CBZ srcend, shortSrc
3739
ADD src, srcend
3840

41+
// dstend16 = max(dstend-16, 0) and similarly for srcend16.
42+
SUBS $16, dstend, dstend16
43+
CSEL LO, ZR, dstend16, dstend16
44+
SUBS $16, srcend, srcend16
45+
CSEL LO, ZR, srcend16, srcend16
46+
3947
LDP dict_base+48(FP), (dict, dictlen)
4048
ADD dict, dictlen, dictend
4149

@@ -71,27 +79,31 @@ readLitlenDone:
7179
// Copy literal.
7280
SUBS $16, len
7381
BLO copyLiteralShort
74-
AND $15, len, lenRem
7582

7683
copyLiteralLoop:
77-
SUBS $16, len
7884
LDP.P 16(src), (tmp1, tmp2)
7985
STP.P (tmp1, tmp2), 16(dst)
86+
SUBS $16, len
8087
BPL copyLiteralLoop
8188

82-
// lenRem = len%16 is the remaining number of bytes we need to copy.
83-
// Since len was >= 16, we can do this in one load and one store,
84-
// overlapping with the last load and store, without worrying about
85-
// writing out of bounds.
86-
ADD lenRem, src
87-
ADD lenRem, dst
88-
LDP -16(src), (tmp1, tmp2)
89-
STP (tmp1, tmp2), -16(dst)
89+
// Copy (final part of) literal of length 0-15.
90+
// If we have >=16 bytes left in src and dst, just copy 16 bytes.
91+
copyLiteralShort:
92+
CMP dstend16, dst
93+
CCMP LO, src, srcend16, $0b0010 // 0010 = preserve carry (LO).
94+
BHS copyLiteralShortEnd
95+
96+
AND $15, len
97+
98+
LDP (src), (tmp1, tmp2)
99+
ADD len, src
100+
STP (tmp1, tmp2), (dst)
101+
ADD len, dst
90102

91103
B copyLiteralDone
92104

93-
// Copy literal of length 0-15.
94-
copyLiteralShort:
105+
// Safe but slow copy near the end of src, dst.
106+
copyLiteralShortEnd:
95107
TBZ $3, len, 3(PC)
96108
MOVD.P 8(src), tmp1
97109
MOVD.P tmp1, 8(dst)
@@ -159,7 +171,7 @@ copyDict:
159171
CCMP NE, dictend, match, $0b0100 // 0100 sets the Z (EQ) flag.
160172
BNE copyDict
161173

162-
CBZ len, copyMatchDone
174+
CBZ len, copyMatchDone
163175

164176
// If the match extends beyond the dictionary, the rest is at dstorig.
165177
MOVD dstorig, match

internal/lz4block/decode_other.go

Lines changed: 11 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
//go:build (!amd64 && !arm && !arm64) || appengine || !gc || noasm
12
// +build !amd64,!arm,!arm64 appengine !gc noasm
23

34
package lz4block
@@ -10,7 +11,6 @@ func decodeBlock(dst, src, dict []byte) (ret int) {
1011
// Restrict capacities so we don't read or write out of bounds.
1112
dst = dst[:len(dst):len(dst)]
1213
src = src[:len(src):len(src)]
13-
dictLen := uint(len(dict))
1414

1515
const hasError = -2
1616
defer func() {
@@ -102,35 +102,17 @@ func decodeBlock(dst, src, dict []byte) (ret int) {
102102

103103
// Copy the match.
104104
if di < offset {
105-
// The match is beyond our block, meaning in the dictionary
106-
if offset-di > mLen {
107-
// The match is entirely contained in the dictionary. Just copy!
108-
copy(dst[di:di+mLen], dict[dictLen+di-offset:dictLen+di-offset+mLen])
109-
di = di + mLen
110-
} else {
111-
// The match stretches over the dictionary and our block
112-
copySize := offset - di
113-
restSize := mLen - copySize
114-
115-
copy(dst[di:di+copySize], dict[dictLen-copySize:])
116-
di = di + copySize
117-
118-
if di < restSize {
119-
// Overlap - we want to copy more than what we have available,
120-
// so copy byte per byte.
121-
copyFrom := 0
122-
endOfMatch := di + restSize
123-
for di < endOfMatch {
124-
dst[di] = dst[copyFrom]
125-
di = di + 1
126-
copyFrom = copyFrom + 1
127-
}
128-
} else {
129-
copy(dst[di:di+restSize], dst[0:restSize])
130-
di = di + restSize
131-
}
105+
// The match is beyond our block, meaning the first part
106+
// is in the dictionary.
107+
fromDict := dict[uint(len(dict))+di-offset:]
108+
n := uint(copy(dst[di:di+mLen], fromDict))
109+
di += n
110+
if mLen -= n; mLen == 0 {
111+
continue
132112
}
133-
continue
113+
// We copied n = offset-di bytes from the dictionary,
114+
// then set di = di+n = offset, so the following code
115+
// copies from dst[di-offset:] = dst[0:].
134116
}
135117

136118
expanded := dst[di-offset:]

internal/lz4block/decode_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,13 @@ func TestDecodeWithDict(t *testing.T) {
228228
// First part in dictionary, rest in dst.
229229
{"\x35foo\x09\x00\x401234", "0barbaz", "foobarbazfoo1234"},
230230

231-
// Same, but >16 bytes before the end,
231+
// Copy end of dictionary three times, then a literal.
232+
{"\x08\x04\x00\x50abcde", "---1234", "123412341234abcde"},
233+
234+
// First part in dictionary, rest in dst, copied multiple times.
235+
{"\x1a1\x05\x00\x50abcde", "---2345", "123451234512345abcde"},
236+
237+
// First part in dictionary, rest in dst, but >16 bytes before the end,
232238
// to test the short match shortcut in the amd64 decoder.
233239
{"\x35abc\x09\x00\xf0\x0f0123456789abcdefghijklmnopqrst", "012defghi",
234240
"abcdefghiabc0123456789abcdefghijklmnopqrst"},

0 commit comments

Comments
 (0)