diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index 552514063c95ab2..ebc7a100d91a646 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -1042,7 +1042,7 @@ first calculation. :: >>> functools.reduce(operator.concat, []) Traceback (most recent call last): ... - TypeError: reduce() of empty sequence with no initial value + TypeError: reduce() of empty iterable with no initial value >>> functools.reduce(operator.mul, [1, 2, 3], 1) 6 >>> functools.reduce(operator.mul, [], 1) diff --git a/Lib/shutil.py b/Lib/shutil.py index 4d5a283662101c4..5095318da233146 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -1307,12 +1307,6 @@ def unregister_unpack_format(name): """Removes the pack format from the registry.""" del _UNPACK_FORMATS[name] -def _ensure_directory(path): - """Ensure that the parent directory of `path` exists""" - dirname = os.path.dirname(path) - if not os.path.isdir(dirname): - os.makedirs(dirname) - def _unpack_zipfile(filename, extract_dir): """Unpack zip `filename` to `extract_dir` """ diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index d8e3b671ec229f9..64293d757331d75 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1032,6 +1032,21 @@ def test_failure(self): # Previously, a second call could crash due to internal inconsistency self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30) + def test_decompress_after_data_error(self): + data = bytes.fromhex( + "425a6839314159265359000000000000007fffff000000000000000000000000" + "00000000000000000000000000000000000000e0370000000000000000000000" + "000000000000000000000000000000000000000000000000000083f3" + ) + bzd = BZ2Decompressor() + with self.assertRaisesRegex(OSError, "Invalid data stream"): + bzd.decompress(data) + # Previously, a second call could crash due to internal inconsistency + self.assertFalse(bzd.needs_input) + self.assertFalse(bzd.eof) + with self.assertRaisesRegex(ValueError, "previous error"): + bzd.decompress(b'\x00' * 18) + @support.refcount_test def test_refleaks_in___init__(self): gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index 177fe45e8d97490..2e2ac147968dd4a 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -354,6 +354,8 @@ def test_find_end(self): self.assertEqual(m.find(b'one', 1, -1), 8) self.assertEqual(m.find(b'one', 1, -2), -1) self.assertEqual(m.find(bytearray(b'one')), 0) + self.assertEqual(m.find(b'', n + 1), -1) + self.assertEqual(m.rfind(b'', n + 1), -1) for i in range(-n-1, n+1): for j in range(-n-1, n+1): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-06-06-08-20-00.gh-issue-150942.Jk9pQr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-06-08-20-00.gh-issue-150942.Jk9pQr.rst new file mode 100644 index 000000000000000..9777b8932271404 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-06-06-08-20-00.gh-issue-150942.Jk9pQr.rst @@ -0,0 +1,3 @@ +Speed up frame local variable item collection by appending result pairs to the +output list without an extra reference-count round-trip (using the internal +reference-stealing list append helper). Patch by Omkar Kabde. diff --git a/Misc/NEWS.d/next/Library/2026-05-31-12-00-00.gh-issue-150942.Re7Ref.rst b/Misc/NEWS.d/next/Library/2026-05-31-12-00-00.gh-issue-150942.Re7Ref.rst new file mode 100644 index 000000000000000..63967108b1e0b3b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-31-12-00-00.gh-issue-150942.Re7Ref.rst @@ -0,0 +1,3 @@ +Speed up :func:`re.findall`, :func:`re.sub` and :func:`re.subn` by appending +result items to the output list without an extra reference-count round-trip +(using the internal reference-stealing list append helper). diff --git a/Misc/NEWS.d/next/Library/2026-06-06-15-20-54.gh-issue-151021.J4qk2A.rst b/Misc/NEWS.d/next/Library/2026-06-06-15-20-54.gh-issue-151021.J4qk2A.rst new file mode 100644 index 000000000000000..0617fa068c844d6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-06-15-20-54.gh-issue-151021.J4qk2A.rst @@ -0,0 +1,3 @@ +Fix :meth:`mmap.mmap.find` and :meth:`~mmap.mmap.rfind` to return ``-1`` +when searching for an empty subsequence with a start position past the end +of the mapping. diff --git a/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst new file mode 100644 index 000000000000000..a37d86cf423f820 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst @@ -0,0 +1,3 @@ +Fix a possible stack buffer overflow in :mod:`bz2` when a +:class:`bz2.BZ2Decompressor` is reused after a decompression error. +The decompressor now becomes unusable after libbz2 reports an error. diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 4cf8beed9ee3eba..9db3ac39da52099 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -108,6 +108,7 @@ typedef struct { typedef struct { PyObject_HEAD bz_stream bzs; + int bzerror; char eof; /* Py_T_BOOL expects a char */ PyObject *unused_data; char needs_input; @@ -435,8 +436,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) d->bzs_avail_in_real += bzs->avail_in; - if (catch_bz2_error(bzret)) + if (catch_bz2_error(bzret)) { + d->bzerror = bzret; + FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0); goto error; + } if (bzret == BZ_STREAM_END) { FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1); break; @@ -607,10 +611,17 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, PyObject *result = NULL; PyMutex_Lock(&self->mutex); - if (self->eof) + if (self->eof) { PyErr_SetString(PyExc_EOFError, "End of stream already reached"); - else + } + else if (self->bzerror) { + // Re-entering BZ2_bzDecompress() after an error can write out of bounds. + PyErr_SetString(PyExc_ValueError, + "Decompressor is unusable after a previous error"); + } + else { result = decompress(self, data->buf, data->len, max_length); + } PyMutex_Unlock(&self->mutex); return result; } @@ -638,6 +649,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) } self->mutex = (PyMutex){0}; + self->bzerror = 0; self->needs_input = 1; self->bzs_avail_in_real = 0; self->input_buffer = NULL; diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index ee6cb4a371ea505..32aa06bed4a409c 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -42,6 +42,7 @@ static const char copyright[] = #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION #include "pycore_dict.h" // _PyDict_Next() #include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_list.h" // _PyList_AppendTakeRef() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_tuple.h" // _PyTuple_FromPairSteal #include "pycore_unicodeobject.h" // _PyUnicode_Copy @@ -986,8 +987,7 @@ _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, break; } - status = PyList_Append(list, item); - Py_DECREF(item); + status = _PyList_AppendTakeRef((PyListObject *)list, item); if (status < 0) goto error; @@ -1333,8 +1333,7 @@ pattern_subx(_sremodulestate* module_state, string, i, b); if (!item) goto error; - status = PyList_Append(list, item); - Py_DECREF(item); + status = _PyList_AppendTakeRef((PyListObject *)list, item); if (status < 0) goto error; @@ -1363,8 +1362,7 @@ pattern_subx(_sremodulestate* module_state, /* add to list */ if (item != Py_None) { - status = PyList_Append(list, item); - Py_DECREF(item); + status = _PyList_AppendTakeRef((PyListObject *)list, item); if (status < 0) goto error; } @@ -1381,8 +1379,7 @@ pattern_subx(_sremodulestate* module_state, string, i, state.endpos); if (!item) goto error; - status = PyList_Append(list, item); - Py_DECREF(item); + status = _PyList_AppendTakeRef((PyListObject *)list, item); if (status < 0) goto error; } diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index a30afe91f8fa171..6fb04ba7bd47c67 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -620,8 +620,6 @@ mmap_gfind_lock_held(mmap_object *self, Py_buffer *view, PyObject *start_obj, start += self->size; if (start < 0) start = 0; - else if (start > self->size) - start = self->size; if (end < 0) end += self->size; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index f60cdb2dd1bf20d..b19889d3034e715 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -9,6 +9,7 @@ #include "pycore_function.h" // _PyFunction_FromConstructor() #include "pycore_genobject.h" // _PyGen_GetGeneratorFromFrame() #include "pycore_interpframe.h" // _PyFrame_GetLocalsArray() +#include "pycore_list.h" // _PyList_AppendTakeRef() #include "pycore_modsupport.h" // _PyArg_CheckPositional() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_opcode_metadata.h" // _PyOpcode_Caches @@ -636,9 +637,7 @@ framelocalsproxy_items(PyObject *self, PyObject *Py_UNUSED(ignored)) goto error; } - int rc = PyList_Append(items, pair); - Py_DECREF(pair); - if (rc < 0) { + if (_PyList_AppendTakeRef((PyListObject *)items, pair) < 0) { goto error; } } @@ -655,9 +654,7 @@ framelocalsproxy_items(PyObject *self, PyObject *Py_UNUSED(ignored)) goto error; } - int rc = PyList_Append(items, pair); - Py_DECREF(pair); - if (rc < 0) { + if (_PyList_AppendTakeRef((PyListObject *)items, pair) < 0) { goto error; } }