Skip to content

Commit a6265d5

Browse files
committed
hash.c: use salted higher quality hashsum instead of murmur.
Use more safe hash function for integer hashing. Multiplying both (v^seed[0]) and ((v<<32|v>>32)^seed[1]) fixes patological case of murmur. On 64bit platform it looks like: u64 next(u64 prev, u64 v) { u64 h1 = (v ^ seed[0]) * m; u64 h2 = ((v<<32|v>>32) ^ seed[1]) * m; h1 ^= prev; h1 = (h1<<32|h1>>32); h1 *= m; h2 = (h2<<31|h2>>33); h2 ^= prev; h2 *= m; return h1 ^ h2 ^ (h1 >> 32) ^ (h2 >> 33); }
1 parent b8fcb0a commit a6265d5

File tree

6 files changed

+93
-75
lines changed

6 files changed

+93
-75
lines changed

hash.c

Lines changed: 25 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
152152
st_index_t hnum;
153153

154154
if (SPECIAL_CONST_P(a)) {
155-
if (a == Qundef) return 0;
156155
if (STATIC_SYM_P(a)) {
157156
hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
158157
goto out;
@@ -175,8 +174,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
175174
}
176175
else if (BUILTIN_TYPE(a) == T_FLOAT) {
177176
flt:
178-
hval = rb_dbl_hash(rb_float_value(a));
179-
hnum = FIX2LONG(hval);
177+
hnum = rb_dbl_hash_long(rb_float_value(a));
180178
}
181179
else {
182180
hnum = other_func(a);
@@ -199,35 +197,31 @@ rb_any_hash(VALUE a)
199197
return any_hash(a, obj_any_hash);
200198
}
201199

202-
static st_index_t
203-
rb_num_hash_start(st_index_t n)
204-
{
205-
/*
206-
* Murmur2 like finalization to ensure all bits are mixed into lower bits.
207-
*/
208-
#define MurmurMagic_1 (st_index_t)0xc6a4a793
209-
#define MurmurMagic_2 (st_index_t)0x5bd1e995
210-
#if SIZEOF_ST_INDEX_T > 4
211-
#define MurmurMagic ((MurmurMagic_1 << 32) | MurmurMagic_2)
212-
n ^= n >> 24; n ^= n >> 35;
213-
n *= MurmurMagic; n ^= n >> 40;
214-
#else
215-
#define MurmurMagic MurmurMagic_2
216-
n ^= n >> 11; n ^= n >> 12;
217-
n *= MurmurMagic; n ^= n >> 20;
218-
#endif
219-
return n;
200+
long
201+
rb_dbl_hash_long(double d)
202+
{
203+
long hash;
204+
#define ind_in_dbl type_roomof(double, st_index_t)
205+
union {
206+
st_index_t i[ind_in_dbl];
207+
double d;
208+
} v = { {0} };
209+
/* normalize -0.0 to 0.0 */
210+
v.d = d == 0.0 ? 0.0 : d;
211+
hash = rb_hash_end(v.i[0]);
212+
if (ind_in_dbl > 1) {
213+
unsigned i;
214+
for (i = 1; i < ind_in_dbl; i++) {
215+
hash = rb_hash_uint(hash, v.i[i]);
216+
}
217+
}
218+
return hash;
220219
}
221220

222221
long
223222
rb_objid_hash(st_index_t index)
224223
{
225-
st_index_t hnum = rb_num_hash_start(index);
226-
227-
hnum = rb_hash_start(hnum);
228-
hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
229-
hnum = rb_hash_end(hnum);
230-
return hnum;
224+
return rb_hash_end(index);
231225
}
232226

233227
static st_index_t
@@ -259,18 +253,7 @@ static const struct st_hash_type objhash = {
259253
static st_index_t
260254
rb_ident_hash(st_data_t n)
261255
{
262-
#ifdef USE_FLONUM /* RUBY */
263-
/*
264-
* - flonum (on 64-bit) is pathologically bad, mix the actual
265-
* float value in, but do not use the float value as-is since
266-
* many integers get interpreted as 2.0 or -2.0 [Bug #10761]
267-
*/
268-
if (FLONUM_P(n)) {
269-
n ^= (st_data_t)rb_float_value(n);
270-
}
271-
#endif
272-
273-
return (st_index_t)rb_num_hash_start((st_index_t)n);
256+
return (st_index_t)rb_hash_end((st_index_t)n);
274257
}
275258

276259
static const struct st_hash_type identhash = {
@@ -2183,11 +2166,10 @@ static int
21832166
hash_i(VALUE key, VALUE val, VALUE arg)
21842167
{
21852168
st_index_t *hval = (st_index_t *)arg;
2186-
st_index_t hdata[2];
2169+
st_index_t h;
21872170

2188-
hdata[0] = rb_hash(key);
2189-
hdata[1] = rb_hash(val);
2190-
*hval ^= st_hash(hdata, sizeof(hdata), 0);
2171+
h = rb_hash_uint(*hval, rb_hash(key));
2172+
*hval = rb_hash_uint(h, rb_hash(val));
21912173
return ST_CONTINUE;
21922174
}
21932175

include/ruby/intern.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -757,14 +757,11 @@ VALUE rb_str_cat_cstr(VALUE, const char*);
757757
VALUE rb_str_cat2(VALUE, const char*);
758758
VALUE rb_str_append(VALUE, VALUE);
759759
VALUE rb_str_concat(VALUE, VALUE);
760-
st_index_t rb_memhash(const void *ptr, long len);
761-
st_index_t rb_hash_start(st_index_t);
762-
st_index_t rb_hash_uint32(st_index_t, uint32_t);
763-
st_index_t rb_hash_uint(st_index_t, st_index_t);
764-
st_index_t rb_hash_end(st_index_t);
765-
#define rb_hash_uint32(h, i) st_hash_uint32((h), (i))
766-
#define rb_hash_uint(h, i) st_hash_uint((h), (i))
767-
#define rb_hash_end(h) st_hash_end(h)
760+
PUREFUNC(st_index_t rb_memhash(const void *ptr, long len));
761+
PUREFUNC(st_index_t rb_hash_start(st_index_t));
762+
PUREFUNC(st_index_t rb_hash_uint32(st_index_t, uint32_t));
763+
PUREFUNC(st_index_t rb_hash_uint(st_index_t, st_index_t));
764+
PUREFUNC(st_index_t rb_hash_end(st_index_t));
768765
st_index_t rb_str_hash(VALUE);
769766
int rb_str_hash_cmp(VALUE,VALUE);
770767
int rb_str_comparable(VALUE, VALUE);

internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,7 @@ VALUE rb_int_modulo(VALUE x, VALUE y);
11071107
VALUE rb_int_round(VALUE num, int ndigits);
11081108
VALUE rb_int2str(VALUE num, int base);
11091109
VALUE rb_dbl_hash(double d);
1110+
long rb_dbl_hash_long(double d);
11101111
VALUE rb_fix_plus(VALUE x, VALUE y);
11111112
VALUE rb_int_ge(VALUE x, VALUE y);
11121113

numeric.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,12 +1241,7 @@ flo_hash(VALUE num)
12411241
VALUE
12421242
rb_dbl_hash(double d)
12431243
{
1244-
st_index_t hash;
1245-
1246-
/* normalize -0.0 to 0.0 */
1247-
if (d == 0.0) d = 0.0;
1248-
hash = rb_memhash(&d, sizeof(d));
1249-
return LONG2FIX(hash);
1244+
return LONG2FIX(rb_dbl_hash_long(d));
12501245
}
12511246

12521247
VALUE

object.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -162,15 +162,7 @@ rb_obj_equal(VALUE obj1, VALUE obj2)
162162
VALUE
163163
rb_obj_hash(VALUE obj)
164164
{
165-
VALUE oid = rb_obj_id(obj);
166-
#if SIZEOF_LONG == SIZEOF_VOIDP
167-
st_index_t index = NUM2LONG(oid);
168-
#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
169-
st_index_t index = NUM2LL(oid);
170-
#else
171-
# error not supported
172-
#endif
173-
return LONG2FIX(rb_objid_hash(index));
165+
/* stub for documentation */
174166
}
175167
#else
176168
VALUE rb_obj_hash(VALUE obj);

random.c

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,7 +1473,7 @@ random_s_rand(int argc, VALUE *argv, VALUE obj)
14731473
#endif
14741474
#include "siphash.c"
14751475

1476-
static st_index_t hashseed;
1476+
static st_index_t hashseed[3];
14771477
typedef uint8_t sipseed_keys_t[16];
14781478
static union {
14791479
sipseed_keys_t key;
@@ -1483,19 +1483,22 @@ static union {
14831483
static void
14841484
init_hashseed(struct MT *mt)
14851485
{
1486-
hashseed = genrand_int32(mt);
1486+
int i;
1487+
for (i=0; i < numberof(hashseed); i++) {
1488+
hashseed[i] = genrand_int32(mt);
14871489
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
1488-
hashseed <<= 32;
1489-
hashseed |= genrand_int32(mt);
1490+
hashseed[i] <<= 32;
1491+
hashseed[i] |= genrand_int32(mt);
14901492
#endif
14911493
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1492-
hashseed <<= 32;
1493-
hashseed |= genrand_int32(mt);
1494+
hashseed[i] <<= 32;
1495+
hashseed[i] |= genrand_int32(mt);
14941496
#endif
14951497
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
1496-
hashseed <<= 32;
1497-
hashseed |= genrand_int32(mt);
1498+
hashseed[i] <<= 32;
1499+
hashseed[i] |= genrand_int32(mt);
14981500
#endif
1501+
}
14991502
}
15001503

15011504
static void
@@ -1507,12 +1510,60 @@ init_siphash(struct MT *mt)
15071510
sipseed.u32[i] = genrand_int32(mt);
15081511
}
15091512

1513+
#define MurmurMagic_1 (st_index_t)0xc6a4a793
1514+
#define MurmurMagic_2 (st_index_t)0x5bd1e995
1515+
#if ST_INDEX_BITS > 4*8
1516+
#define MurmurMagic ((MurmurMagic_1 << 32) | MurmurMagic_2)
1517+
#else
1518+
#define MurmurMagic MurmurMagic_2
1519+
#endif
1520+
#define half (ST_INDEX_BITS/2)
1521+
1522+
static inline st_index_t
1523+
rb_hash_next(st_index_t h, st_index_t v)
1524+
{
1525+
st_index_t h1 = (v ^ hashseed[1]) * MurmurMagic;
1526+
st_index_t h2 = ((v<<half|v>>half) ^ hashseed[2]) * MurmurMagic;
1527+
h1 ^= h;
1528+
h1 = h1<<half|h1>>half;
1529+
h1 *= MurmurMagic;
1530+
h2 = h2<<(half-1)|h2>>(half+1);
1531+
h2 ^= h;
1532+
h2 *= MurmurMagic;
1533+
#if ST_INDEX_BITS > (8*8)
1534+
h1 ^= h1 >> (half-1);
1535+
h2 ^= h2 >> (half-1);
1536+
h1 *= MurmurMagic;
1537+
h2 *= MurmurMagic;
1538+
#endif
1539+
return h1 ^ h2 ^ (h1 >> half) ^ (h2 >> (half+1));
1540+
}
1541+
1542+
st_index_t
1543+
rb_hash_uint(st_index_t h, st_index_t v)
1544+
{
1545+
return rb_hash_next(h, v);
1546+
}
1547+
1548+
st_index_t
1549+
rb_hash_uint32(st_index_t h, uint32_t v)
1550+
{
1551+
return rb_hash_next(h, v);
1552+
}
1553+
15101554
st_index_t
15111555
rb_hash_start(st_index_t h)
15121556
{
1513-
return st_hash_start(hashseed + h);
1557+
return h + hashseed[0];
15141558
}
15151559

1560+
st_index_t
1561+
rb_hash_end(st_index_t h)
1562+
{
1563+
return rb_hash_next(0, h);
1564+
}
1565+
1566+
15161567
st_index_t
15171568
rb_memhash(const void *ptr, long len)
15181569
{

0 commit comments

Comments
 (0)