Skip to content

Commit dac9a9b

Browse files
committed
Add Zlib::Inflate#inflate :buffer keyword argument
If a buffer keyword argument is given, it is used as the buffer, instead of creating new strings. This can result in significantly lower memory usage during inflation. Implements #19
1 parent ba9793c commit dac9a9b

File tree

2 files changed

+128
-14
lines changed

2 files changed

+128
-14
lines changed

ext/zlib/zlib.c

Lines changed: 69 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@ max_uint(long n)
5656
#define MAX_UINT(n) (uInt)(n)
5757
#endif
5858

59-
static ID id_dictionaries, id_read;
59+
#define OPTHASH_GIVEN_P(opts) \
60+
(argc > 0 && !NIL_P((opts) = rb_check_hash_type(argv[argc-1])) && (--argc, 1))
61+
62+
static ID id_dictionaries, id_read, id_buffer;
6063

6164
/*--------- Prototypes --------*/
6265

@@ -130,7 +133,7 @@ static VALUE rb_inflate_s_allocate(VALUE);
130133
static VALUE rb_inflate_initialize(int, VALUE*, VALUE);
131134
static VALUE rb_inflate_s_inflate(VALUE, VALUE);
132135
static void do_inflate(struct zstream*, VALUE);
133-
static VALUE rb_inflate_inflate(VALUE, VALUE);
136+
static VALUE rb_inflate_inflate(int, VALUE*, VALUE);
134137
static VALUE rb_inflate_addstr(VALUE, VALUE);
135138
static VALUE rb_inflate_sync(VALUE, VALUE);
136139
static VALUE rb_inflate_sync_point_p(VALUE);
@@ -557,7 +560,8 @@ struct zstream {
557560
#define ZSTREAM_FLAG_CLOSING 0x8
558561
#define ZSTREAM_FLAG_GZFILE 0x10 /* disallows yield from expand_buffer for
559562
gzip*/
560-
#define ZSTREAM_FLAG_UNUSED 0x20
563+
#define ZSTREAM_REUSE_BUFFER 0x20
564+
#define ZSTREAM_FLAG_UNUSED 0x40
561565

562566
#define ZSTREAM_READY(z) ((z)->flags |= ZSTREAM_FLAG_READY)
563567
#define ZSTREAM_IS_READY(z) ((z)->flags & ZSTREAM_FLAG_READY)
@@ -566,6 +570,8 @@ struct zstream {
566570
#define ZSTREAM_IS_GZFILE(z) ((z)->flags & ZSTREAM_FLAG_GZFILE)
567571
#define ZSTREAM_BUF_FILLED(z) (NIL_P((z)->buf) ? 0 : RSTRING_LEN((z)->buf))
568572

573+
#define ZSTREAM_REUSE_BUFFER_P(z) ((z)->flags & ZSTREAM_REUSE_BUFFER)
574+
569575
#define ZSTREAM_EXPAND_BUFFER_OK 0
570576

571577
/* I think that more better value should be found,
@@ -642,11 +648,19 @@ zstream_expand_buffer(struct zstream *z)
642648
if (buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
643649
int state = 0;
644650

645-
rb_obj_reveal(z->buf, rb_cString);
651+
if (!ZSTREAM_REUSE_BUFFER_P(z)) {
652+
rb_obj_reveal(z->buf, rb_cString);
653+
}
646654

647655
rb_protect(rb_yield, z->buf, &state);
648656

649-
z->buf = Qnil;
657+
if (ZSTREAM_REUSE_BUFFER_P(z)) {
658+
rb_str_modify(z->buf);
659+
rb_str_set_len(z->buf, 0);
660+
}
661+
else {
662+
z->buf = Qnil;
663+
}
650664
zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX);
651665

652666
if (state)
@@ -764,7 +778,9 @@ zstream_detach_buffer(struct zstream *z)
764778
}
765779
else {
766780
dst = z->buf;
767-
rb_obj_reveal(dst, rb_cString);
781+
if (!ZSTREAM_REUSE_BUFFER_P(z)) {
782+
rb_obj_reveal(dst, rb_cString);
783+
}
768784
}
769785

770786
z->buf = Qnil;
@@ -2013,8 +2029,8 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary)
20132029
* Document-method: Zlib::Inflate#inflate
20142030
*
20152031
* call-seq:
2016-
* inflate(deflate_string) -> String
2017-
* inflate(deflate_string) { |chunk| ... } -> nil
2032+
* inflate(deflate_string, buffer: nil) -> String
2033+
* inflate(deflate_string, buffer: nil) { |chunk| ... } -> nil
20182034
*
20192035
* Inputs +deflate_string+ into the inflate stream and returns the output from
20202036
* the stream. Calling this method, both the input and the output buffer of
@@ -2024,6 +2040,15 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary)
20242040
* If a block is given consecutive inflated chunks from the +deflate_string+
20252041
* are yielded to the block and +nil+ is returned.
20262042
*
2043+
* If a :buffer keyword argument is given and not nil:
2044+
*
2045+
* * The :buffer keyword should be a String, and will used as the output buffer.
2046+
* Using this option can reuse the memory required during inflation.
2047+
* * When not passing a block, the return value will be the same object as the
2048+
* :buffer keyword argument.
2049+
* * When passing a block, the yielded chunks will be the same value as the
2050+
* :buffer keyword argument.
2051+
*
20272052
* Raises a Zlib::NeedDict exception if a preset dictionary is needed to
20282053
* decompress. Set the dictionary by Zlib::Inflate#set_dictionary and then
20292054
* call this method again with an empty string to flush the stream:
@@ -2047,10 +2072,37 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary)
20472072
* See also Zlib::Inflate.new
20482073
*/
20492074
static VALUE
2050-
rb_inflate_inflate(VALUE obj, VALUE src)
2075+
rb_inflate_inflate(int argc, VALUE* argv, VALUE obj)
20512076
{
20522077
struct zstream *z = get_zstream(obj);
2053-
VALUE dst;
2078+
VALUE dst, src, opts, buffer = Qnil;
2079+
2080+
if (OPTHASH_GIVEN_P(opts)) {
2081+
VALUE buf;
2082+
rb_get_kwargs(opts, &id_buffer, 0, 1, &buf);
2083+
if (buf != Qundef && buf != Qnil) {
2084+
buffer = StringValue(buf);
2085+
}
2086+
}
2087+
if (buffer != Qnil) {
2088+
if (!(ZSTREAM_REUSE_BUFFER_P(z) && z->buf == buffer)) {
2089+
long len = RSTRING_LEN(buffer);
2090+
if (len >= ZSTREAM_AVAIL_OUT_STEP_MAX) {
2091+
rb_str_modify(buffer);
2092+
}
2093+
else {
2094+
len = ZSTREAM_AVAIL_OUT_STEP_MAX - len;
2095+
rb_str_modify_expand(buffer, len);
2096+
}
2097+
rb_str_set_len(buffer, 0);
2098+
z->flags |= ZSTREAM_REUSE_BUFFER;
2099+
z->buf = buffer;
2100+
}
2101+
} else if (ZSTREAM_REUSE_BUFFER_P(z)) {
2102+
z->flags &= ~ZSTREAM_REUSE_BUFFER;
2103+
z->buf = Qnil;
2104+
}
2105+
rb_scan_args(argc, argv, "10", &src);
20542106

20552107
if (ZSTREAM_IS_FINISHED(z)) {
20562108
if (NIL_P(src)) {
@@ -2059,7 +2111,11 @@ rb_inflate_inflate(VALUE obj, VALUE src)
20592111
else {
20602112
StringValue(src);
20612113
zstream_append_buffer2(z, src);
2062-
dst = rb_str_new(0, 0);
2114+
if (ZSTREAM_REUSE_BUFFER_P(z)) {
2115+
dst = rb_str_resize(buffer, 0);
2116+
} else {
2117+
dst = rb_str_new(0, 0);
2118+
}
20632119
}
20642120
}
20652121
else {
@@ -4368,8 +4424,6 @@ zlib_gzip_end(struct gzfile *gz)
43684424
zstream_end(&gz->z);
43694425
}
43704426

4371-
#define OPTHASH_GIVEN_P(opts) \
4372-
(argc > 0 && !NIL_P((opts) = rb_check_hash_type(argv[argc-1])) && (--argc, 1))
43734427
static ID id_level, id_strategy;
43744428
static VALUE zlib_gzip_run(VALUE arg);
43754429

@@ -4616,7 +4670,7 @@ Init_zlib(void)
46164670
rb_define_alloc_func(cInflate, rb_inflate_s_allocate);
46174671
rb_define_method(cInflate, "initialize", rb_inflate_initialize, -1);
46184672
rb_define_method(cInflate, "add_dictionary", rb_inflate_add_dictionary, 1);
4619-
rb_define_method(cInflate, "inflate", rb_inflate_inflate, 1);
4673+
rb_define_method(cInflate, "inflate", rb_inflate_inflate, -1);
46204674
rb_define_method(cInflate, "<<", rb_inflate_addstr, 1);
46214675
rb_define_method(cInflate, "sync", rb_inflate_sync, 1);
46224676
rb_define_method(cInflate, "sync_point?", rb_inflate_sync_point_p, 0);
@@ -4827,6 +4881,7 @@ Init_zlib(void)
48274881

48284882
id_level = rb_intern("level");
48294883
id_strategy = rb_intern("strategy");
4884+
id_buffer = rb_intern("buffer");
48304885
#endif /* GZIP_SUPPORT */
48314886
}
48324887

test/zlib/test_zlib.rb

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,65 @@ def test_inflate_chunked
363363
}
364364
end
365365

366+
def test_inflate_buffer
367+
s = Zlib::Deflate.deflate("foo")
368+
z = Zlib::Inflate.new
369+
buf = String.new
370+
s = z.inflate(s, buffer: buf)
371+
assert_same(buf, s)
372+
buf = String.new
373+
s << z.inflate(nil, buffer: buf)
374+
assert_equal("foo", s)
375+
z.inflate("foo", buffer: buf) # ???
376+
z << "foo" # ???
377+
end
378+
379+
def test_inflate_buffer_partial_input
380+
deflated = Zlib::Deflate.deflate "\0"
381+
382+
z = Zlib::Inflate.new
383+
384+
inflated = "".dup
385+
386+
buf = String.new
387+
deflated.each_char do |byte|
388+
inflated << z.inflate(byte, buffer: buf)
389+
end
390+
391+
inflated << z.finish
392+
393+
assert_equal "\0", inflated
394+
end
395+
396+
def test_inflate_buffer_chunked
397+
# s = Zlib::Deflate.deflate("0" * 100_000)
398+
zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \
399+
"\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
400+
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
401+
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
402+
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
403+
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
404+
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
405+
"\000\000\000\000\000\000\000\257\006\351\247BH"
406+
407+
chunks = []
408+
409+
z = Zlib::Inflate.new
410+
411+
buf = String.new
412+
z.inflate(zeros, buffer: buf) do |chunk|
413+
assert_same(buf, chunk)
414+
chunks << chunk.dup
415+
end
416+
417+
assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696],
418+
chunks.map { |chunk| chunk.size }
419+
420+
assert chunks.all? { |chunk|
421+
chunk =~ /\A0+\z/
422+
}
423+
end
424+
366425
def test_inflate_chunked_break
367426
# zeros = Zlib::Deflate.deflate("0" * 100_000)
368427
zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \

0 commit comments

Comments
 (0)