Skip to content

Commit cfd1ecf

Browse files
committed
expand: add BracesSeq with a config parameter and error returns
BracesSeq is a streaming, error-returning replacement for Braces. A sequence brace whose count exceeds an internal limit yields an error rather than taking too much memory or time. FieldsSeq now drives BracesSeq, so the gosh interpreter surfaces the error to stderr like bash does. An interp smoke test covers the end-to-end path. Braces is now deprecated with a short explanation why.
1 parent 4d2f0ee commit cfd1ecf

4 files changed

Lines changed: 168 additions & 58 deletions

File tree

expand/braces.go

Lines changed: 64 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
package expand
55

66
import (
7+
"fmt"
8+
"iter"
79
"strconv"
810
"strings"
911

@@ -15,28 +17,76 @@ import (
1517
// "foo{bar,baz}" will return two literal words, "foobar" and "foobaz".
1618
//
1719
// Note that the resulting words may share word parts.
20+
//
21+
// Deprecated: use [BracesSeq], which yields words lazily and reports an
22+
// error rather than letting a large sequence allocate huge amounts.
1823
func Braces(word *syntax.Word) []*syntax.Word {
1924
var all []*syntax.Word
25+
bracesSeqRec(word, func(w *syntax.Word) bool {
26+
all = append(all, w)
27+
return true
28+
})
29+
return all
30+
}
31+
32+
// BracesSeq performs brace expansion on a word, given that it contains any
33+
// [syntax.BraceExp] parts. For example, the word with a brace expansion
34+
// "foo{bar,baz}" will return two literal words, "foobar" and "foobaz".
35+
//
36+
// The iteration yields an error and stops if the total expansion is too
37+
// large, including combinatorial blow-ups across multiple brace expansions
38+
// like {1..100}{1..100}{1..100}. This may be configurable with cfg in the
39+
// future; the parameter is entirely unused for now.
40+
//
41+
// Note that the resulting words may share word parts.
42+
func BracesSeq(cfg *Config, word *syntax.Word) iter.Seq2[*syntax.Word, error] {
43+
return func(yield func(*syntax.Word, error) bool) {
44+
// 16Ki expanded elements is more than any script should need in practice,
45+
// but it's small enough where we don't waste too much memory and CPU.
46+
const limit = 16 << 10
47+
count := 0
48+
bracesSeqRec(word, func(w *syntax.Word) bool {
49+
count++
50+
if count > limit {
51+
yield(nil, fmt.Errorf("brace expansion would exceed %d elements", limit))
52+
return false
53+
}
54+
return yield(w, nil)
55+
})
56+
}
57+
}
58+
59+
// bracesSeqRec yields each fully-expanded word descended from word.
60+
// It returns false if iteration should stop.
61+
func bracesSeqRec(word *syntax.Word, yield func(*syntax.Word) bool) bool {
2062
var left []syntax.WordPart
2163
for i, wp := range word.Parts {
2264
br, ok := wp.(*syntax.BraceExp)
2365
if !ok {
2466
left = append(left, wp)
2567
continue
2668
}
69+
rest := word.Parts[i+1:]
70+
// Yield each word produced by recursing on `next`,
71+
// after prepending `left` to its Parts.
72+
expand := func(next *syntax.Word) bool {
73+
return bracesSeqRec(next, func(w *syntax.Word) bool {
74+
w.Parts = append(append([]syntax.WordPart(nil), left...), w.Parts...)
75+
return yield(w)
76+
})
77+
}
2778
if br.Sequence {
28-
chars := false
29-
3079
fromLit := br.Elems[0].Lit()
3180
toLit := br.Elems[1].Lit()
3281
zeros := max(extraLeadingZeros(fromLit), extraLeadingZeros(toLit))
3382

83+
chars := false
3484
from, err1 := strconv.Atoi(fromLit)
3585
to, err2 := strconv.Atoi(toLit)
3686
if err1 != nil || err2 != nil {
3787
chars = true
38-
from = int(br.Elems[0].Lit()[0])
39-
to = int(br.Elems[1].Lit()[0])
88+
from = int(fromLit[0])
89+
to = int(toLit[0])
4090
}
4191
upward := from <= to
4292
incr := 1
@@ -49,45 +99,31 @@ func Braces(word *syntax.Word) []*syntax.Word {
4999
incr = n
50100
}
51101
}
52-
n := from
53-
for {
54-
if upward && n > to {
55-
break
56-
}
57-
if !upward && n < to {
58-
break
59-
}
102+
for n := from; (upward && n <= to) || (!upward && n >= to); n += incr {
60103
next := *word
61-
next.Parts = next.Parts[i+1:]
62104
lit := &syntax.Lit{}
63105
if chars {
64106
lit.Value = string(rune(n))
65107
} else {
66108
lit.Value = strings.Repeat("0", zeros) + strconv.Itoa(n)
67109
}
68-
next.Parts = append([]syntax.WordPart{lit}, next.Parts...)
69-
exp := Braces(&next)
70-
for _, w := range exp {
71-
w.Parts = append(left, w.Parts...)
110+
next.Parts = append([]syntax.WordPart{lit}, rest...)
111+
if !expand(&next) {
112+
return false
72113
}
73-
all = append(all, exp...)
74-
n += incr
75114
}
76-
return all
115+
return true
77116
}
78117
for _, elem := range br.Elems {
79118
next := *word
80-
next.Parts = next.Parts[i+1:]
81-
next.Parts = append(elem.Parts, next.Parts...)
82-
exp := Braces(&next)
83-
for _, w := range exp {
84-
w.Parts = append(left, w.Parts...)
119+
next.Parts = append(append([]syntax.WordPart(nil), elem.Parts...), rest...)
120+
if !expand(&next) {
121+
return false
85122
}
86-
all = append(all, exp...)
87123
}
88-
return all
124+
return true
89125
}
90-
return []*syntax.Word{{Parts: left}}
126+
return yield(&syntax.Word{Parts: left})
91127
}
92128

93129
func extraLeadingZeros(s string) int {

expand/braces_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,60 @@ func TestBraces(t *testing.T) {
185185
}
186186
}
187187

188+
func TestBracesSeq(t *testing.T) {
189+
t.Parallel()
190+
for _, tc := range braceTests {
191+
t.Run("", func(t *testing.T) {
192+
inStr := printWords(tc.in)
193+
wantStr := printWords(tc.want...)
194+
195+
inBraces := *tc.in
196+
syntax.SplitBraces(&inBraces)
197+
198+
var got []*syntax.Word
199+
for w, err := range BracesSeq(nil, &inBraces) {
200+
if err != nil {
201+
t.Fatalf("unexpected error on %q: %v", inStr, err)
202+
}
203+
got = append(got, w)
204+
}
205+
gotStr := printWords(got...)
206+
if gotStr != wantStr {
207+
t.Fatalf("mismatch in %q\nwant:\n%s\ngot: %s",
208+
inStr, wantStr, gotStr)
209+
}
210+
})
211+
}
212+
}
213+
214+
func TestBracesSeqError(t *testing.T) {
215+
t.Parallel()
216+
tests := []string{
217+
"{1..100000}",
218+
"a{0..9223372036854775807}b",
219+
"{-9223372036854775808..9223372036854775807}",
220+
"{1..1000000000..1}",
221+
"{1..100}{1..100}{1..100}",
222+
"{a,b,c,d}{1..100}{1..100}{1..50}",
223+
}
224+
for _, in := range tests {
225+
t.Run(in, func(t *testing.T) {
226+
word := &syntax.Word{Parts: []syntax.WordPart{lit(in)}}
227+
syntax.SplitBraces(word)
228+
var gotErr error
229+
for _, err := range BracesSeq(nil, word) {
230+
if err != nil {
231+
gotErr = err
232+
break
233+
}
234+
}
235+
if gotErr == nil {
236+
t.Fatalf("expected error for %q", in)
237+
}
238+
})
239+
}
240+
}
241+
188242
func wantBraceExpParts(t *testing.T, word *syntax.Word, want bool) {
189243
t.Helper()
190244
anyBrace := false

expand/expand.go

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -460,43 +460,55 @@ func FieldsSeq(cfg *Config, words ...*syntax.Word) iter.Seq2[string, error] {
460460
cfg = prepareConfig(cfg)
461461
dir := cfg.envGet("PWD")
462462
return func(yield func(string, error) bool) {
463+
expandWord := func(w *syntax.Word) (stop bool) {
464+
wfields, err := cfg.wordFields(w.Parts)
465+
if err != nil {
466+
yield("", err)
467+
return true
468+
}
469+
for _, field := range wfields {
470+
path, doGlob := cfg.escapedGlobField(field)
471+
if doGlob && cfg.ReadDir2 != nil {
472+
// Note that globbing requires keeping a slice state, so it doesn't
473+
// really benefit from using an iterator.
474+
matches, err := cfg.glob(dir, path)
475+
if err != nil {
476+
// We avoid [errors.As] as it allocates,
477+
// and we know that [Config.glob] returns [pattern.Regexp] errors without wrapping.
478+
if _, ok := err.(*pattern.SyntaxError); !ok {
479+
yield("", err)
480+
return true
481+
}
482+
} else if len(matches) > 0 || cfg.NullGlob {
483+
for _, m := range matches {
484+
if !yield(m, nil) {
485+
return true
486+
}
487+
}
488+
continue
489+
}
490+
}
491+
if !yield(cfg.fieldJoin(field), nil) {
492+
return true
493+
}
494+
}
495+
return false
496+
}
463497
for _, word := range words {
464498
word := *word // make a copy, since SplitBraces replaces the Parts slice
465-
afterBraces := []*syntax.Word{&word}
466-
if syntax.SplitBraces(&word) {
467-
afterBraces = Braces(&word)
499+
if !syntax.SplitBraces(&word) {
500+
if expandWord(&word) {
501+
return
502+
}
503+
continue
468504
}
469-
for _, word2 := range afterBraces {
470-
wfields, err := cfg.wordFields(word2.Parts)
505+
for w, err := range BracesSeq(cfg, &word) {
471506
if err != nil {
472507
yield("", err)
473508
return
474509
}
475-
for _, field := range wfields {
476-
path, doGlob := cfg.escapedGlobField(field)
477-
if doGlob && cfg.ReadDir2 != nil {
478-
// Note that globbing requires keeping a slice state, so it doesn't
479-
// really benefit from using an iterator.
480-
matches, err := cfg.glob(dir, path)
481-
if err != nil {
482-
// We avoid [errors.As] as it allocates,
483-
// and we know that [Config.glob] returns [pattern.Regexp] errors without wrapping.
484-
if _, ok := err.(*pattern.SyntaxError); !ok {
485-
yield("", err)
486-
return
487-
}
488-
} else if len(matches) > 0 || cfg.NullGlob {
489-
for _, m := range matches {
490-
if !yield(m, nil) {
491-
return
492-
}
493-
}
494-
continue
495-
}
496-
}
497-
if !yield(cfg.fieldJoin(field), nil) {
498-
return
499-
}
510+
if expandWord(w) {
511+
return
500512
}
501513
}
502514
}

interp/interp_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3352,6 +3352,14 @@ done <<< 2`,
33523352
{`echo a{1\}2,3}b`, "a1}2b a3b\n"},
33533353
{`echo a{1\..2}b`, "a{1..2}b\n"},
33543354
{`echo \{\{iriname\}\}`, "{{iriname}}\n"},
3355+
{
3356+
"echo {1..100000}",
3357+
"brace expansion would exceed 16384 elements\n #IGNORE bash has no defensive limit below MaxInt",
3358+
},
3359+
{
3360+
"echo a{0..9999999999}b",
3361+
"brace expansion would exceed 16384 elements\n #JUSTERR bash errors with a different message",
3362+
},
33553363

33563364
// brace expansion in declarations
33573365
{"declare {A,B}_VAR=1; echo $A_VAR $B_VAR", "1 1\n"},

0 commit comments

Comments
 (0)