8
8
#include "textflag.h"
9
9
10
10
// Register allocation.
11
- #define dst R0
12
- #define dstorig R1
13
- #define src R2
14
- #define dstend R3
15
- #define srcend R4
16
- #define match R5 // Match address.
17
- #define dict R6
18
- #define dictlen R7
19
- #define dictend R8
20
- #define token R9
21
- #define len R10 // Literal and match lengths.
22
- #define lenRem R11
23
- #define offset R12 // Match offset.
24
- #define tmp1 R13
25
- #define tmp2 R14
26
- #define tmp3 R15
27
- #define tmp4 R16
11
+ #define dst R0
12
+ #define dstorig R1
13
+ #define src R2
14
+ #define dstend R3
15
+ #define dstend16 R4 // dstend - 16
16
+ #define srcend R5
17
+ #define srcend16 R6 // srcend - 16
18
+ #define match R7 // Match address.
19
+ #define dict R8
20
+ #define dictlen R9
21
+ #define dictend R10
22
+ #define token R11
23
+ #define len R12 // Literal and match lengths.
24
+ #define lenRem R13
25
+ #define offset R14 // Match offset.
26
+ #define tmp1 R15
27
+ #define tmp2 R16
28
+ #define tmp3 R17
29
+ #define tmp4 R19
28
30
29
31
// func decodeBlock(dst, src, dict []byte) int
30
32
TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0 -80
@@ -36,6 +38,12 @@ TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0-80
36
38
CBZ srcend, shortSrc
37
39
ADD src, srcend
38
40
41
+ // dstend16 = max(dstend-16, 0) and similarly for srcend16.
42
+ SUBS $16 , dstend, dstend16
43
+ CSEL LO, ZR, dstend16, dstend16
44
+ SUBS $16 , srcend, srcend16
45
+ CSEL LO, ZR, srcend16, srcend16
46
+
39
47
LDP dict_base+48 (FP), (dict, dictlen)
40
48
ADD dict, dictlen, dictend
41
49
@@ -71,27 +79,31 @@ readLitlenDone:
71
79
// Copy literal.
72
80
SUBS $16 , len
73
81
BLO copyLiteralShort
74
- AND $15 , len, lenRem
75
82
76
83
copyLiteralLoop:
77
- SUBS $16 , len
78
84
LDP.P 16 (src), (tmp1, tmp2)
79
85
STP.P (tmp1, tmp2), 16 (dst)
86
+ SUBS $16 , len
80
87
BPL copyLiteralLoop
81
88
82
- // lenRem = len%16 is the remaining number of bytes we need to copy.
83
- // Since len was >= 16, we can do this in one load and one store,
84
- // overlapping with the last load and store, without worrying about
85
- // writing out of bounds.
86
- ADD lenRem, src
87
- ADD lenRem, dst
88
- LDP -16 (src), (tmp1, tmp2)
89
- STP (tmp1, tmp2), -16 (dst)
89
+ // Copy (final part of) literal of length 0-15.
90
+ // If we have >=16 bytes left in src and dst, just copy 16 bytes.
91
+ copyLiteralShort:
92
+ CMP dstend16, dst
93
+ CCMP LO, src, srcend16, $0b0010 // 0010 = preserve carry (LO).
94
+ BHS copyLiteralShortEnd
95
+
96
+ AND $15 , len
97
+
98
+ LDP (src), (tmp1, tmp2)
99
+ ADD len, src
100
+ STP (tmp1, tmp2), (dst)
101
+ ADD len, dst
90
102
91
103
B copyLiteralDone
92
104
93
- // Copy literal of length 0-15 .
94
- copyLiteralShort :
105
+ // Safe but slow copy near the end of src, dst .
106
+ copyLiteralShortEnd :
95
107
TBZ $3 , len, 3 (PC)
96
108
MOVD .P 8 (src), tmp1
97
109
MOVD .P tmp1, 8 (dst)
@@ -159,7 +171,7 @@ copyDict:
159
171
CCMP NE, dictend, match, $0b0100 // 0100 sets the Z (EQ) flag.
160
172
BNE copyDict
161
173
162
- CBZ len, copyMatchDone
174
+ CBZ len, copyMatchDone
163
175
164
176
// If the match extends beyond the dictionary, the rest is at dstorig.
165
177
MOVD dstorig, match
0 commit comments