Skip to content

Commit 7b6b6f1

Browse files
committed
unicode: immortalize interned strings
1 parent b6b12a9 commit 7b6b6f1

File tree

2 files changed

+80
-4
lines changed

2 files changed

+80
-4
lines changed

Include/internal/pycore_global_objects.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ struct _Py_cached_objects {
3737
#define _Py_SINGLETON(NAME) \
3838
_Py_GLOBAL_OBJECT(singletons.NAME)
3939

40+
41+
struct _Py_immortalized_objects {
42+
_PyMutex mutex;
43+
Py_ssize_t size;
44+
Py_ssize_t capacity;
45+
PyObject **array;
46+
};
47+
4048
struct _Py_static_objects {
4149
struct {
4250
/* Small integers are preallocated in this array so that they
@@ -61,6 +69,8 @@ struct _Py_static_objects {
6169
PyHamtNode_Bitmap hamt_bitmap_node_empty;
6270
_PyContextTokenMissing context_token_missing;
6371
} singletons;
72+
73+
struct _Py_immortalized_objects immortal;
6474
};
6575

6676
#define _Py_INTERP_CACHED_OBJECT(interp, NAME) \

Objects/unicodeobject.c

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,6 +1847,21 @@ PyUnicode_FromString(const char *u)
18471847
return PyUnicode_DecodeUTF8Stateful(u, (Py_ssize_t)size, NULL, NULL);
18481848
}
18491849

1850+
static PyObject **
1851+
resize_array(PyObject **array, Py_ssize_t *capacity)
1852+
{
1853+
Py_ssize_t old_size = *capacity;
1854+
Py_ssize_t new_size = Py_MAX(old_size * 2, 16);
1855+
Py_ssize_t item_size = sizeof(array[0]);
1856+
PyObject **new_array = PyMem_Realloc(array, new_size * item_size);
1857+
if (new_array == NULL) {
1858+
PyErr_NoMemory();
1859+
return NULL;
1860+
}
1861+
memset(&new_array[old_size], 0, (new_size - old_size) * item_size);
1862+
*capacity = new_size;
1863+
return new_array;
1864+
}
18501865

18511866
PyObject *
18521867
_PyUnicode_FromId(_Py_Identifier *id)
@@ -1909,6 +1924,34 @@ _PyUnicode_FromId(_Py_Identifier *id)
19091924
return obj;
19101925
}
19111926

1927+
static int
1928+
_PyUnicode_Immortalize(PyObject *obj)
1929+
{
1930+
assert(!_PyObject_IS_IMMORTAL(obj));
1931+
1932+
struct _Py_immortalized_objects *imm = &_PyRuntime.static_objects.immortal;
1933+
1934+
_PyMutex_lock(&imm->mutex);
1935+
Py_ssize_t index = imm->size;
1936+
if (index >= imm->capacity) {
1937+
Py_ssize_t capacity = imm->capacity;
1938+
PyObject **new_array = resize_array(imm->array, &capacity);
1939+
if (new_array == NULL) {
1940+
_PyMutex_unlock(&imm->mutex);
1941+
return -1;
1942+
}
1943+
1944+
imm->array = new_array;
1945+
imm->capacity = capacity;
1946+
}
1947+
1948+
_PyObject_SetImmortal(obj);
1949+
imm->array[index] = obj;
1950+
imm->size++;
1951+
_PyMutex_unlock(&imm->mutex);
1952+
return 0;
1953+
}
1954+
19121955

19131956
static void
19141957
unicode_clear_identifiers(struct _Py_unicode_state *state)
@@ -1924,6 +1967,18 @@ unicode_clear_identifiers(struct _Py_unicode_state *state)
19241967
// after Py_Finalize().
19251968
}
19261969

1970+
static void
1971+
unicode_free_immortalized(_PyRuntimeState *runtime)
1972+
{
1973+
struct _Py_immortalized_objects *imm = &runtime->static_objects.immortal;
1974+
for (Py_ssize_t i=0; i < imm->size; i++) {
1975+
_PyUnicode_ExactDealloc(imm->array[i]);
1976+
}
1977+
imm->size = 0;
1978+
PyMem_Free(imm->array);
1979+
imm->array = NULL;
1980+
imm->capacity = 0;
1981+
}
19271982

19281983
/* Internal function, doesn't check maximum character */
19291984

@@ -14623,11 +14678,18 @@ PyUnicode_InternInPlace(PyObject **p)
1462314678
return;
1462414679
}
1462514680

14626-
/* The two references in interned dict (key and value) are not counted by
14627-
refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
14628-
this. */
14629-
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1463014681
_PyUnicode_STATE(s).interned = 1;
14682+
14683+
if (_Py_ThreadLocal(t) && _PyUnicode_Immortalize(t) == 0) {
14684+
/* Nothing to do if we immortalize the string */
14685+
return;
14686+
}
14687+
else {
14688+
/* The two references in interned dict (key and value) are not counted by
14689+
refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
14690+
this. */
14691+
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
14692+
}
1463114693
}
1463214694

1463314695
// Function kept for the stable ABI.
@@ -15136,6 +15198,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1513615198
interp->unicode.ucnhash_capi = NULL;
1513715199

1513815200
unicode_clear_identifiers(state);
15201+
15202+
if (_Py_IsMainInterpreter(interp)) {
15203+
unicode_free_immortalized(&_PyRuntime);
15204+
}
1513915205
}
1514015206

1514115207
/* A _string module, to export formatter_parser and formatter_field_name_split

0 commit comments

Comments
 (0)