Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Lib/profiling/sampling/binary_collector.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Thin Python wrapper around C binary writer for profiling data."""

import sys
import time

import _remote_debugging
Expand Down Expand Up @@ -61,6 +62,7 @@ def __init__(self, filename, sample_interval_usec, *, skip_idle=False,
self.filename = filename
self.sample_interval_usec = sample_interval_usec
self.skip_idle = skip_idle
self.running = True

compression_type = _resolve_compression(compression)
start_time_us = int(time.monotonic() * 1_000_000)
Expand All @@ -81,7 +83,13 @@ def collect(self, stack_frames, timestamp_us=None):
"""
if timestamp_us is None:
timestamp_us = int(time.monotonic() * 1_000_000)
self._writer.write_sample(stack_frames, timestamp_us)
try:
self._writer.write_sample(stack_frames, timestamp_us)
except OverflowError as e:

@maurycy maurycy Jul 2, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pablogsal Truth be told, I'm not 100% sure what's the best layer for handling "finalizable" exceptions like OverflowError.

The promise of "finalizing the file" is in the BinaryWriter:

/*[clinic input]
_remote_debugging.BinaryWriter.__exit__
exc_type: object = None
exc_val: object = None
exc_tb: object = None
Exit context manager, finalizing the file.
[clinic start generated code]*/

This is not the case:

def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit - finalize unless there was an error."""
if exc_type is None:
self._writer.finalize()
else:
self._writer.close()

/* Only finalize on normal exit (no exception) */

Perhaps it's something as simple as weakening exc_type == Py_None, instead of doing a round-trip since the module should contain all the knowledge (ie: which exceptions are finalizable for it.)

This issue presents itself there since we've got both the unwinder and the collector in the bloc :

except (RuntimeError, UnicodeDecodeError, MemoryError, OSError):

...and the meaning of MemoryError, RuntimeError or OSError in the unwinder is non-fatal, but for the binary collector it might mean non-finalizable exception.

Of course - the very proper way to approach this is to maintain the finalizable state is in the writer itself, instead of relying on the exceptions... but that's way too much for this PR.

self.running = False
print(f"Warning: {e}; stopping early and keeping the data "
"collected so far.",
file=sys.stderr)

def collect_failed_sample(self):
"""Record a failed sample attempt (no-op for binary format)."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import unittest
from collections import defaultdict

from test.support import captured_stderr

try:
import _remote_debugging
from _remote_debugging import (
Expand Down Expand Up @@ -971,6 +973,83 @@ def test_writer_total_samples_after_close_returns_zero(self):
w.close()
self.assertEqual(w.total_samples, 0)

def test_binary_collector_stops_gracefully_on_overflow(self):
"""OverflowError from the writer stops collection via the running
protocol instead of propagating and corrupting the file.
See gh-151292."""
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
filename = f.name
self.temp_files.append(filename)

collector = BinaryCollector(filename, 1000, compression="none")
self.assertTrue(collector.running)

sample = [
make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
]

# Collect real samples first, then hit the limit.
for i in range(3):
collector.collect(sample, timestamp_us=(i + 1) * 1000)
self.assertTrue(collector.running)

real_writer = collector._writer

class _OverflowingWriter:
def write_sample(self, stack_frames, timestamp_us):
raise OverflowError("too many samples for binary format")

collector._writer = _OverflowingWriter()
with captured_stderr() as stderr:
collector.collect(sample, timestamp_us=4000)

self.assertFalse(collector.running)
self.assertIn("too many samples", stderr.getvalue())

# The real writer can still be finalized into a valid file that
# keeps the samples collected before the limit was hit.
collector._writer = real_writer
collector.export(None)

with open(filename, "rb") as f:
header = f.read(32)
magic, version = struct.unpack_from("=II", header, 0)
self.assertEqual(magic, 0x54414348) # "TACH"
self.assertEqual(version, 1)
(sample_count,) = struct.unpack_from("=I", header, 28)
self.assertEqual(sample_count, 3)

reader_collector = RawCollector()
with BinaryReader(filename) as reader:
self.assertEqual(reader.replay_samples(reader_collector), 3)

def test_interpreter_id_overflow_rejected(self):
"""An interpreter_id wider than u32 raises OverflowError before any
writer state is mutated: subsequent valid samples are still accepted
and finalize produces a readable file."""
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
filename = f.name
self.temp_files.append(filename)

good = [
make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
]
bad = [
make_interpreter(2**32, [make_thread(1, [make_frame("a.py", 1, "f")])])
]

writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
writer.write_sample(good, 1000)
with self.assertRaises(OverflowError):
writer.write_sample(bad, 2000)
writer.write_sample(good, 3000)
writer.finalize()
self.assertEqual(writer.total_samples, 2)

reader_collector = RawCollector()
with BinaryReader(filename) as reader:
self.assertEqual(reader.replay_samples(reader_collector), 2)


class TestBinaryFormatValidation(BinaryFormatTestBase):
"""Tests for malformed binary files."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix ``profiling.sampling --binary`` leaving unreadable profile files when
the ``_remote_debugging`` binary writer raises :exc:`OverflowError`. Patch
by Maurycy Pawłowski-Wieroński.
19 changes: 7 additions & 12 deletions Modules/_remote_debugging/binary_io_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,11 +629,6 @@ flush_pending_rle(BinaryWriter *writer, ThreadEntry *entry)
if (!entry->has_pending_rle || entry->pending_rle_count == 0) {
return 0;
}
if (entry->pending_rle_count > UINT32_MAX - writer->total_samples) {
PyErr_SetString(PyExc_OverflowError,
"too many samples for binary format");
return -1;
}

/* Write RLE record:
* [thread_id: 8] [interpreter_id: 4] [STACK_REPEAT: 1] [count: varint]
Expand All @@ -655,7 +650,6 @@ flush_pending_rle(BinaryWriter *writer, ThreadEntry *entry)
if (writer_write_bytes(writer, &entry->pending_rle[i].status, 1) < 0) {
return -1;
}
writer->total_samples++;
}

writer->stats.repeat_records++;
Expand All @@ -678,12 +672,6 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry,
const uint32_t *frame_indices, size_t stack_depth,
size_t shared_count, size_t pop_count, size_t push_count)
{
if (writer->total_samples == UINT32_MAX) {
PyErr_SetString(PyExc_OverflowError,
"too many samples for binary format");
return -1;
}

/* Header: thread_id(8) + interpreter_id(4) + encoding(1) + delta(varint) + status(1) */
uint8_t header_buf[SAMPLE_HEADER_MAX_SIZE];
memcpy(header_buf + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID);
Expand Down Expand Up @@ -955,6 +943,12 @@ static int
process_thread_sample(BinaryWriter *writer, PyObject *thread_info,
uint32_t interpreter_id, uint64_t timestamp_us)
{
if (writer->total_samples == UINT32_MAX) {
PyErr_SetString(PyExc_OverflowError,
"too many samples for binary format");
return -1;
}

PyObject *thread_id_obj = PyStructSequence_GET_ITEM(thread_info, 0);
PyObject *status_obj = PyStructSequence_GET_ITEM(thread_info, 1);
PyObject *frame_list = PyStructSequence_GET_ITEM(thread_info, 2);
Expand Down Expand Up @@ -1010,6 +1004,7 @@ process_thread_sample(BinaryWriter *writer, PyObject *thread_info,
entry->pending_rle[entry->pending_rle_count].status = status;
entry->pending_rle_count++;
entry->has_pending_rle = 1;
writer->total_samples++;
} else {
/* Stack changed - flush any pending RLE first */
if (entry->has_pending_rle) {
Expand Down
Loading