Skip to content

Commit a4dd011

Browse files
committed
Tentative fix for a problem that Tim discovered at the last moment,
and reported to python-dev: because we were calling dict_resize() in PyDict_Next(), and because GC's dict_traverse() uses PyDict_Next(), and because PyTuple_New() can cause GC, and because dict_items() calls PyTuple_New(), it was possible for dict_items() to have the dict resized right under its nose. The solution is convoluted, and touches several places: keys(), values(), items(), popitem(), PyDict_Next(), and PyDict_SetItem(). There are two parts to it. First, we no longer call dict_resize() in PyDict_Next(), which seems to solve the immediate problem. But then PyDict_SetItem() must have a different policy about when *it* calls dict_resize(), because we want to guarantee (e.g. for an algorithm that Jeremy uses in the compiler) that you can loop over a dict using PyDict_Next() and make changes to the dict as long as those changes are only value replacements for existing keys using PyDict_SetItem(). This is done by resizing *after* the insertion instead of before, and by remembering the size before we insert the item, and if the size is still the same, we don't bother to even check if we might need to resize. An additional detail is that if the dict starts out empty, we must still resize it before the insertion. That was the first part. :-) The second part is to make keys(), values(), items(), and popitem() safe against side effects on the dict caused by allocations, under the assumption that if the GC can cause arbitrary Python code to run, it can cause other threads to run, and it's not inconceivable that our dict could be resized -- it would be insane to write code that relies on this, but not all code is sane. Now, I have this nagging feeling that the loops in lookdict probably are blissfully assuming that doing a simple key comparison does not change the dict's size. This is not necessarily true (the keys could be class instances after all). But that's a battle for another day.
1 parent 0aa30b0 commit a4dd011

File tree

1 file changed

+110
-61
lines changed

1 file changed

+110
-61
lines changed

Objects/dictobject.c

Lines changed: 110 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,8 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
242242
if (restore_error)
243243
PyErr_Restore(err_type, err_value, err_tb);
244244
return ep;
245-
}
246-
else if (ep->me_hash == hash) {
245+
}
246+
else if (ep->me_hash == hash) {
247247
if (!checked_error) {
248248
checked_error = 1;
249249
if (PyErr_Occurred()) {
@@ -289,7 +289,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
289289
register unsigned int mask = mp->ma_size-1;
290290
dictentry *ep0 = mp->ma_table;
291291
register dictentry *ep;
292-
cmpfunc compare = PyString_Type.tp_compare;
292+
cmpfunc compare = PyString_Type.tp_compare;
293293

294294
/* make sure this function doesn't have to handle non-string keys */
295295
if (!PyString_Check(key)) {
@@ -312,7 +312,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
312312
freeslot = ep;
313313
else {
314314
if (ep->me_hash == hash
315-
&& compare(ep->me_key, key) == 0) {
315+
&& compare(ep->me_key, key) == 0) {
316316
return ep;
317317
}
318318
freeslot = NULL;
@@ -338,7 +338,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
338338
|| (ep->me_hash == hash
339339
&& compare(ep->me_key, key) == 0)) {
340340
return ep;
341-
}
341+
}
342342
/* Cycle through GF(2^n)-{0} */
343343
incr = incr << 1;
344344
if (incr > mask)
@@ -454,11 +454,19 @@ PyDict_GetItem(PyObject *op, PyObject *key)
454454
return (mp->ma_lookup)(mp, key, hash)->me_value;
455455
}
456456

457+
/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
458+
* dictionary if it is merely replacing the value for an existing key.
459+
* This is means that it's safe to loop over a dictionary with
460+
* PyDict_Next() and occasionally replace a value -- but you can't
461+
* insert new keys or remove them.
462+
*/
457463
int
458464
PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
459465
{
460466
register dictobject *mp;
461467
register long hash;
468+
register int n_used;
469+
462470
if (!PyDict_Check(op)) {
463471
PyErr_BadInternalCall();
464472
return -1;
@@ -486,18 +494,30 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
486494
if (hash == -1)
487495
return -1;
488496
}
489-
/* If fill >= 2/3 size, adjust size. Normally, this doubles the
497+
if (mp->ma_fill >= mp->ma_size) {
498+
/* No room for a new key.
499+
* This only happens when the dict is empty.
500+
* Let dictresize() create a minimal dict.
501+
*/
502+
assert(mp->ma_used == 0);
503+
if (dictresize(mp, 0) != 0)
504+
return -1;
505+
assert(mp->ma_fill < mp->ma_size);
506+
}
507+
n_used = mp->ma_used;
508+
Py_INCREF(value);
509+
Py_INCREF(key);
510+
insertdict(mp, key, hash, value);
511+
/* If we added a key, we can safely resize. Otherwise skip this!
512+
* If fill >= 2/3 size, adjust size. Normally, this doubles the
490513
* size, but it's also possible for the dict to shrink (if ma_fill is
491514
* much larger than ma_used, meaning a lot of dict keys have been
492515
* deleted).
493-
* CAUTION: this resize logic must match the logic in PyDict_Next.
494516
*/
495-
if (mp->ma_fill*3 >= mp->ma_size*2 &&
496-
dictresize(mp, mp->ma_used*2) != 0)
497-
return -1;
498-
Py_INCREF(value);
499-
Py_INCREF(key);
500-
insertdict(mp, key, hash, value);
517+
if (mp->ma_used > n_used && mp->ma_fill*3 >= mp->ma_size*2) {
518+
if (dictresize(mp, mp->ma_used*2) != 0)
519+
return -1;
520+
}
501521
return 0;
502522
}
503523

@@ -580,23 +600,6 @@ PyDict_Next(PyObject *op, int *ppos, PyObject **pkey, PyObject **pvalue)
580600
i = *ppos;
581601
if (i < 0)
582602
return 0;
583-
584-
/* A hack to support loops that merely change values.
585-
* The problem: PyDict_SetItem() can either grow or shrink the dict
586-
* even when passed a key that's already in the dict. This was a
587-
* repeated source of subtle bugs, bad enough to justify a hack here.
588-
* Approach: If this is the first time PyDict_Next() is being called
589-
* (i==0), first figure out whether PyDict_SetItem() *will* change the
590-
* size, and if so get it changed before we start passing out internal
591-
* indices.
592-
*/
593-
if (i == 0) {
594-
/* This must be a clone of PyDict_SetItem's resize logic. */
595-
if (mp->ma_fill*3 >= mp->ma_size*2 &&
596-
dictresize(mp, mp->ma_used*2) != 0)
597-
return -1;
598-
}
599-
600603
while (i < mp->ma_size && mp->ma_table[i].me_value == NULL)
601604
i++;
602605
*ppos = i+1;
@@ -757,17 +760,27 @@ static PyObject *
757760
dict_keys(register dictobject *mp, PyObject *args)
758761
{
759762
register PyObject *v;
760-
register int i, j;
763+
register int i, j, n;
764+
761765
if (!PyArg_NoArgs(args))
762766
return NULL;
763-
v = PyList_New(mp->ma_used);
767+
again:
768+
n = mp->ma_used;
769+
v = PyList_New(n);
764770
if (v == NULL)
765771
return NULL;
772+
if (n != mp->ma_used) {
773+
/* Durnit. The allocations caused the dict to resize.
774+
* Just start over, this shouldn't normally happen.
775+
*/
776+
Py_DECREF(v);
777+
goto again;
778+
}
766779
for (i = 0, j = 0; i < mp->ma_size; i++) {
767780
if (mp->ma_table[i].me_value != NULL) {
768781
PyObject *key = mp->ma_table[i].me_key;
769782
Py_INCREF(key);
770-
PyList_SetItem(v, j, key);
783+
PyList_SET_ITEM(v, j, key);
771784
j++;
772785
}
773786
}
@@ -778,17 +791,27 @@ static PyObject *
778791
dict_values(register dictobject *mp, PyObject *args)
779792
{
780793
register PyObject *v;
781-
register int i, j;
794+
register int i, j, n;
795+
782796
if (!PyArg_NoArgs(args))
783797
return NULL;
784-
v = PyList_New(mp->ma_used);
798+
again:
799+
n = mp->ma_used;
800+
v = PyList_New(n);
785801
if (v == NULL)
786802
return NULL;
803+
if (n != mp->ma_used) {
804+
/* Durnit. The allocations caused the dict to resize.
805+
* Just start over, this shouldn't normally happen.
806+
*/
807+
Py_DECREF(v);
808+
goto again;
809+
}
787810
for (i = 0, j = 0; i < mp->ma_size; i++) {
788811
if (mp->ma_table[i].me_value != NULL) {
789812
PyObject *value = mp->ma_table[i].me_value;
790813
Py_INCREF(value);
791-
PyList_SetItem(v, j, value);
814+
PyList_SET_ITEM(v, j, value);
792815
j++;
793816
}
794817
}
@@ -799,29 +822,49 @@ static PyObject *
799822
dict_items(register dictobject *mp, PyObject *args)
800823
{
801824
register PyObject *v;
802-
register int i, j;
825+
register int i, j, n;
826+
PyObject *item, *key, *value;
827+
803828
if (!PyArg_NoArgs(args))
804829
return NULL;
805-
v = PyList_New(mp->ma_used);
830+
/* Preallocate the list of tuples, to avoid allocations during
831+
* the loop over the items, which could trigger GC, which
832+
* could resize the dict. :-(
833+
*/
834+
again:
835+
n = mp->ma_used;
836+
v = PyList_New(n);
806837
if (v == NULL)
807838
return NULL;
839+
for (i = 0; i < n; i++) {
840+
item = PyTuple_New(2);
841+
if (item == NULL) {
842+
Py_DECREF(v);
843+
return NULL;
844+
}
845+
PyList_SET_ITEM(v, i, item);
846+
}
847+
if (n != mp->ma_used) {
848+
/* Durnit. The allocations caused the dict to resize.
849+
* Just start over, this shouldn't normally happen.
850+
*/
851+
Py_DECREF(v);
852+
goto again;
853+
}
854+
/* Nothing we do below makes any function calls. */
808855
for (i = 0, j = 0; i < mp->ma_size; i++) {
809856
if (mp->ma_table[i].me_value != NULL) {
810-
PyObject *key = mp->ma_table[i].me_key;
811-
PyObject *value = mp->ma_table[i].me_value;
812-
PyObject *item = PyTuple_New(2);
813-
if (item == NULL) {
814-
Py_DECREF(v);
815-
return NULL;
816-
}
857+
key = mp->ma_table[i].me_key;
858+
value = mp->ma_table[i].me_value;
859+
item = PyList_GET_ITEM(v, j);
817860
Py_INCREF(key);
818-
PyTuple_SetItem(item, 0, key);
861+
PyTuple_SET_ITEM(item, 0, key);
819862
Py_INCREF(value);
820-
PyTuple_SetItem(item, 1, value);
821-
PyList_SetItem(v, j, item);
863+
PyTuple_SET_ITEM(item, 1, value);
822864
j++;
823865
}
824866
}
867+
assert(j == n);
825868
return v;
826869
}
827870

@@ -830,7 +873,7 @@ dict_update(register dictobject *mp, PyObject *args)
830873
{
831874
register int i;
832875
dictobject *other;
833-
dictentry *entry;
876+
dictentry *entry;
834877
if (!PyArg_Parse(args, "O!", &PyDict_Type, &other))
835878
return NULL;
836879
if (other == mp || other->ma_used == 0)
@@ -870,7 +913,7 @@ PyDict_Copy(PyObject *o)
870913
register dictobject *mp;
871914
register int i;
872915
dictobject *copy;
873-
dictentry *entry;
916+
dictentry *entry;
874917

875918
if (o == NULL || !PyDict_Check(o)) {
876919
PyErr_BadInternalCall();
@@ -1117,6 +1160,15 @@ dict_popitem(dictobject *mp, PyObject *args)
11171160
"popitem(): dictionary is empty");
11181161
return NULL;
11191162
}
1163+
/* Allocate the result tuple first. Believe it or not,
1164+
* this allocation could trigger a garbage collection which
1165+
* could resize the dict, which would invalidate the pointer
1166+
* (ep) into the dict calculated below.
1167+
* So we have to do this first.
1168+
*/
1169+
res = PyTuple_New(2);
1170+
if (res == NULL)
1171+
return NULL;
11201172
/* Set ep to "the first" dict entry with a value. We abuse the hash
11211173
* field of slot 0 to hold a search finger:
11221174
* If slot 0 has a value, use slot 0.
@@ -1141,17 +1193,14 @@ dict_popitem(dictobject *mp, PyObject *args)
11411193
i = 1;
11421194
}
11431195
}
1144-
res = PyTuple_New(2);
1145-
if (res != NULL) {
1146-
PyTuple_SET_ITEM(res, 0, ep->me_key);
1147-
PyTuple_SET_ITEM(res, 1, ep->me_value);
1148-
Py_INCREF(dummy);
1149-
ep->me_key = dummy;
1150-
ep->me_value = NULL;
1151-
mp->ma_used--;
1152-
assert(mp->ma_table[0].me_value == NULL);
1153-
mp->ma_table[0].me_hash = i + 1; /* next place to start */
1154-
}
1196+
PyTuple_SET_ITEM(res, 0, ep->me_key);
1197+
PyTuple_SET_ITEM(res, 1, ep->me_value);
1198+
Py_INCREF(dummy);
1199+
ep->me_key = dummy;
1200+
ep->me_value = NULL;
1201+
mp->ma_used--;
1202+
assert(mp->ma_table[0].me_value == NULL);
1203+
mp->ma_table[0].me_hash = i + 1; /* next place to start */
11551204
return res;
11561205
}
11571206

0 commit comments

Comments
 (0)