Skip to content

Commit 7375726

Browse files
committed
size: stop using regexp
Using a regular expression brings in the whole regexp and regexp/syntax packages, which increase the resulting binary size by about 130K (Go 1.18.1, Linux/amd64). Besides, regular expressions are generally slow and incur some initialization overhead (to compile all global regexp.MustComplile variables). This, unlike the size difference, is not the main motivation for this commit, but it feels like it should have also been mentioned. A quick benchmark comparison shows a huge improvement (again, this is not why this is done, nevertheless it pleases the eye): name old time/op new time/op delta ParseSize-4 10.6µs ± 3% 2.6µs ±29% -75.10% (p=0.002 n=6+6) name old alloc/op new alloc/op delta ParseSize-4 3.26kB ± 0% 0.20kB ± 0% -93.75% (p=0.000 n=7+6) name old allocs/op new allocs/op delta ParseSize-4 72.0 ± 0% 26.0 ± 0% -63.89% (p=0.000 n=7+6) Compatibility note: As a result, we are now a but more relaxed to the input, allowing e.g. ".4 Gb", or "-0", or "234. B", following the rules of strconv.ParseFloat. It seems that those were previously rejected as a result of a regex being used, not deliberately. Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
1 parent cec4960 commit 7375726

2 files changed

Lines changed: 74 additions & 27 deletions

File tree

size.go

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package units
22

33
import (
44
"fmt"
5-
"regexp"
65
"strconv"
76
"strings"
87
)
@@ -26,16 +25,17 @@ const (
2625
PiB = 1024 * TiB
2726
)
2827

29-
type unitMap map[string]int64
28+
type unitMap map[byte]int64
3029

3130
var (
32-
decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB}
33-
binaryMap = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB}
34-
sizeRegex = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`)
31+
decimalMap = unitMap{'k': KB, 'm': MB, 'g': GB, 't': TB, 'p': PB}
32+
binaryMap = unitMap{'k': KiB, 'm': MiB, 'g': GiB, 't': TiB, 'p': PiB}
3533
)
3634

37-
var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
38-
var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
35+
var (
36+
decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
37+
binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
38+
)
3939

4040
func getSizeAndUnit(size float64, base float64, _map []string) (float64, string) {
4141
i := 0
@@ -89,20 +89,66 @@ func RAMInBytes(size string) (int64, error) {
8989

9090
// Parses the human-readable size string into the amount it represents.
9191
func parseSize(sizeStr string, uMap unitMap) (int64, error) {
92-
matches := sizeRegex.FindStringSubmatch(sizeStr)
93-
if len(matches) != 4 {
92+
// TODO: rewrite to use strings.Cut if there's a space
93+
// once Go < 1.18 is deprecated.
94+
sep := strings.LastIndexAny(sizeStr, "01234567890. ")
95+
if sep == -1 {
96+
// There should be at least a digit.
9497
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
9598
}
99+
var num, sfx string
100+
if sizeStr[sep] != ' ' {
101+
num = sizeStr[:sep+1]
102+
sfx = sizeStr[sep+1:]
103+
} else {
104+
// Omit the space separator.
105+
num = sizeStr[:sep]
106+
sfx = sizeStr[sep+1:]
107+
}
96108

97-
size, err := strconv.ParseFloat(matches[1], 64)
109+
size, err := strconv.ParseFloat(num, 64)
98110
if err != nil {
99111
return -1, err
100112
}
113+
// Backward compatibility: reject negative sizes.
114+
if size < 0 {
115+
return -1, fmt.Errorf("invalid size: '%s'", sizeStr)
116+
}
117+
118+
if len(sfx) == 0 {
119+
return int64(size), nil
120+
}
101121

102-
unitPrefix := strings.ToLower(matches[3])
103-
if mul, ok := uMap[unitPrefix]; ok {
122+
// Process the suffix.
123+
124+
if len(sfx) > 3 { // Too long.
125+
goto badSuffix
126+
}
127+
sfx = strings.ToLower(sfx)
128+
// Trivial case: b suffix.
129+
if sfx[0] == 'b' {
130+
if len(sfx) > 1 { // no extra characters allowed after b.
131+
goto badSuffix
132+
}
133+
return int64(size), nil
134+
}
135+
// A suffix from the map.
136+
if mul, ok := uMap[sfx[0]]; ok {
104137
size *= float64(mul)
138+
} else {
139+
goto badSuffix
140+
}
141+
142+
// The suffix may have extra "b" or "ib" (e.g. KiB or MB).
143+
switch {
144+
case len(sfx) == 2 && sfx[1] != 'b':
145+
goto badSuffix
146+
case len(sfx) == 3 && sfx[1:] != "ib":
147+
goto badSuffix
105148
}
106149

107150
return int64(size), nil
151+
152+
badSuffix:
153+
return -1, fmt.Errorf("invalid suffix: '%s'", sfx)
108154
}

size_test.go

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,22 @@ func TestFromHumanSize(t *testing.T) {
103103
assertSuccessEquals(t, 32.5*KB, FromHumanSize, "32.5 kB")
104104
assertSuccessEquals(t, 32, FromHumanSize, "32.5 B")
105105
assertSuccessEquals(t, 300, FromHumanSize, "0.3 K")
106+
assertSuccessEquals(t, 300, FromHumanSize, ".3kB")
107+
108+
assertSuccessEquals(t, 0, FromHumanSize, "0.")
109+
assertSuccessEquals(t, 0, FromHumanSize, "0. ")
110+
assertSuccessEquals(t, 0, FromHumanSize, "0.b")
111+
assertSuccessEquals(t, 0, FromHumanSize, "0.B")
112+
assertSuccessEquals(t, 0, FromHumanSize, "-0")
113+
assertSuccessEquals(t, 0, FromHumanSize, "-0b")
114+
assertSuccessEquals(t, 0, FromHumanSize, "-0B")
115+
assertSuccessEquals(t, 0, FromHumanSize, "-0 b")
116+
assertSuccessEquals(t, 0, FromHumanSize, "-0 B")
117+
assertSuccessEquals(t, 32, FromHumanSize, "32.")
118+
assertSuccessEquals(t, 32, FromHumanSize, "32.b")
119+
assertSuccessEquals(t, 32, FromHumanSize, "32.B")
120+
assertSuccessEquals(t, 32, FromHumanSize, "32. b")
121+
assertSuccessEquals(t, 32, FromHumanSize, "32. B")
106122

107123
// We do not tolerate extra leading or trailing spaces
108124
// (except for a space after the number and a missing suffix).
@@ -124,26 +140,11 @@ func TestFromHumanSize(t *testing.T) {
124140
assertError(t, FromHumanSize, " ")
125141
assertError(t, FromHumanSize, " .")
126142
assertError(t, FromHumanSize, " . ")
127-
assertError(t, FromHumanSize, "0.")
128-
assertError(t, FromHumanSize, "0. ")
129-
assertError(t, FromHumanSize, "0.b")
130-
assertError(t, FromHumanSize, "0.B")
131-
assertError(t, FromHumanSize, "-0")
132-
assertError(t, FromHumanSize, "-0b")
133-
assertError(t, FromHumanSize, "-0B")
134-
assertError(t, FromHumanSize, "-0 b")
135-
assertError(t, FromHumanSize, "-0 B")
136143
assertError(t, FromHumanSize, "-32")
137-
assertError(t, FromHumanSize, ".3kB")
138144
assertError(t, FromHumanSize, "-32b")
139145
assertError(t, FromHumanSize, "-32B")
140146
assertError(t, FromHumanSize, "-32 b")
141147
assertError(t, FromHumanSize, "-32 B")
142-
assertError(t, FromHumanSize, "32.")
143-
assertError(t, FromHumanSize, "32.b")
144-
assertError(t, FromHumanSize, "32.B")
145-
assertError(t, FromHumanSize, "32. b")
146-
assertError(t, FromHumanSize, "32. B")
147148
assertError(t, FromHumanSize, "32b.")
148149
assertError(t, FromHumanSize, "32B.")
149150
assertError(t, FromHumanSize, "32 b.")

0 commit comments

Comments
 (0)