Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix several crashes due to reading invalid memory in the new Tachyon
sampling profiler. Patch by Pablo Galindo.
6 changes: 6 additions & 0 deletions Modules/_remote_debugging/_remote_debugging.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ typedef enum _WIN32_THREADSTATE {
#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)

/* Maximum sizes for validation to prevent buffer overflows from corrupted data */
#define MAX_STACK_CHUNK_SIZE (16 * 1024 * 1024) /* 16 MB max for stack chunks */
#define MAX_LONG_DIGITS 64 /* Allows values up to ~2^1920 */
#define MAX_SET_TABLE_SIZE (1 << 20) /* 1 million entries max for set iteration */

#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
Expand Down Expand Up @@ -451,6 +456,7 @@ extern PyObject *make_frame_info(
extern bool parse_linetable(
const uintptr_t addrq,
const char* linetable,
Py_ssize_t linetable_size,
int firstlineno,
LocationInfo* info
);
Expand Down
17 changes: 13 additions & 4 deletions Modules/_remote_debugging/asyncio.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,17 @@ iterate_set_entries(
}

Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used);
Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1;
Py_ssize_t mask = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask);
uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table);

// Validate mask and num_els to prevent huge loop iterations from garbage data
if (mask < 0 || mask >= MAX_SET_TABLE_SIZE || num_els < 0 || num_els > mask + 1) {
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid set object (corrupted remote memory)");
return -1;
}
Py_ssize_t set_len = mask + 1;

Py_ssize_t i = 0;
Py_ssize_t els = 0;
while (i < set_len && els < num_els) {
Expand Down Expand Up @@ -812,14 +820,15 @@ append_awaited_by_for_thread(
return -1;
}

if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) {
uintptr_t next_node = GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next);
if (next_node == 0) {
PyErr_SetString(PyExc_RuntimeError,
"Invalid linked list structure reading remote memory");
set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list");
return -1;
}

uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next)
uintptr_t task_addr = next_node
- (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node;

if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
Expand All @@ -830,7 +839,7 @@ append_awaited_by_for_thread(
// Read next node
if (_Py_RemoteDebug_PagedReadRemoteMemory(
&unwinder->handle,
(uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next),
next_node,
sizeof(task_node),
task_node) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by");
Expand Down
117 changes: 91 additions & 26 deletions Modules/_remote_debugging/code_objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,44 +123,74 @@ cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t
* LINE TABLE PARSING FUNCTIONS
* ============================================================================ */

// Inline helper for bounds-checked byte reading (no function call overhead)
static inline int
read_byte(const uint8_t **ptr, const uint8_t *end, uint8_t *out)
{
if (*ptr >= end) {
return -1;
}
*out = *(*ptr)++;
return 0;
}

static int
scan_varint(const uint8_t **ptr)
scan_varint(const uint8_t **ptr, const uint8_t *end)
{
unsigned int read = **ptr;
*ptr = *ptr + 1;
uint8_t read;
if (read_byte(ptr, end, &read) < 0) {
return -1;
}
unsigned int val = read & 63;
unsigned int shift = 0;
while (read & 64) {
read = **ptr;
*ptr = *ptr + 1;
if (read_byte(ptr, end, &read) < 0) {
return -1;
}
shift += 6;
// Prevent infinite loop on malformed data (shift overflow)
if (shift > 28) {
return -1;
}
val |= (read & 63) << shift;
}
return val;
return (int)val;
}

static int
scan_signed_varint(const uint8_t **ptr)
scan_signed_varint(const uint8_t **ptr, const uint8_t *end)
{
unsigned int uval = scan_varint(ptr);
int uval = scan_varint(ptr, end);
if (uval < 0) {
return INT_MIN; // Error sentinel (valid signed varints won't be INT_MIN)
}
if (uval & 1) {
return -(int)(uval >> 1);
return -(int)((unsigned int)uval >> 1);
}
else {
return uval >> 1;
return (int)((unsigned int)uval >> 1);
}
}

bool
parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t linetable_size,
int firstlineno, LocationInfo* info)
{
// Reject garbage: zero or negative size
if (linetable_size <= 0) {
return false;
}

const uint8_t* ptr = (const uint8_t*)(linetable);
const uint8_t* end = ptr + linetable_size;
uintptr_t addr = 0;
int computed_line = firstlineno; // Running accumulator, separate from output
int val; // Temporary for varint results
uint8_t byte; // Temporary for byte reads
const size_t MAX_LINETABLE_ENTRIES = 65536;
size_t entry_count = 0;

while (*ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
while (ptr < end && *ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
entry_count++;
uint8_t first_byte = *(ptr++);
uint8_t code = (first_byte >> 3) & 15;
Expand All @@ -173,14 +203,34 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
info->column = info->end_column = -1;
break;
case PY_CODE_LOCATION_INFO_LONG:
computed_line += scan_signed_varint(&ptr);
val = scan_signed_varint(&ptr, end);
if (val == INT_MIN) {
return false;
}
computed_line += val;
info->lineno = computed_line;
info->end_lineno = computed_line + scan_varint(&ptr);
info->column = scan_varint(&ptr) - 1;
info->end_column = scan_varint(&ptr) - 1;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->end_lineno = computed_line + val;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->column = val - 1;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->end_column = val - 1;
break;
case PY_CODE_LOCATION_INFO_NO_COLUMNS:
computed_line += scan_signed_varint(&ptr);
val = scan_signed_varint(&ptr, end);
if (val == INT_MIN) {
return false;
}
computed_line += val;
info->lineno = info->end_lineno = computed_line;
info->column = info->end_column = -1;
break;
Expand All @@ -189,17 +239,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
case PY_CODE_LOCATION_INFO_ONE_LINE2:
computed_line += code - 10;
info->lineno = info->end_lineno = computed_line;
info->column = *(ptr++);
info->end_column = *(ptr++);
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
info->column = byte;
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
info->end_column = byte;
break;
default: {
uint8_t second_byte = *(ptr++);
if ((second_byte & 128) != 0) {
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
if ((byte & 128) != 0) {
return false;
}
info->lineno = info->end_lineno = computed_line;
info->column = code << 3 | (second_byte >> 4);
info->end_column = info->column + (second_byte & 15);
info->column = code << 3 | (byte >> 4);
info->end_column = info->column + (byte & 15);
break;
}
}
Expand Down Expand Up @@ -384,8 +442,14 @@ parse_code_object(RemoteUnwinderObject *unwinder,
tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
}

if (tlbc_entry && ctx->tlbc_index < tlbc_entry->tlbc_array_size) {
assert(ctx->tlbc_index >= 0);
// Validate tlbc_index and check TLBC cache
if (tlbc_entry) {
// Validate index bounds (also catches negative values since tlbc_index is signed)
if (ctx->tlbc_index < 0 || ctx->tlbc_index >= tlbc_entry->tlbc_array_size) {
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid tlbc_index (corrupted remote memory)");
goto error;
}
assert(tlbc_entry->tlbc_array_size > 0);
// Use cached TLBC data
uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
Expand All @@ -398,7 +462,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
}
}

// Fall back to main bytecode
// Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0)
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;

done_tlbc:
Expand All @@ -409,6 +473,7 @@ parse_code_object(RemoteUnwinderObject *unwinder,
; // Empty statement to avoid C23 extension warning
LocationInfo info = {0};
bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
PyBytes_GET_SIZE(meta->linetable),
meta->first_lineno, &info);
if (!ok) {
info.lineno = -1;
Expand Down
15 changes: 14 additions & 1 deletion Modules/_remote_debugging/frames.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ process_single_stack_chunk(
// Check actual size and reread if necessary
size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size));
if (actual_size != current_size) {
// Validate size: reject garbage (too small or unreasonably large)
// Size must be at least enough for the header and reasonably bounded
if (actual_size <= offsetof(_PyStackChunk, data) || actual_size > MAX_STACK_CHUNK_SIZE) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is looser than I'd like for _PyStackChunk, but whatever.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have any suggestion?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wish we had a tighter bound, but I'm not sure what would be appropiate for a real world stack frame? So that's why I said whatever :)

Copy link
Member Author

@pablogsal pablogsal Dec 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wish we had a tighter bound, but I'm not sure what would be appropiate for a real world stack frame?

But this is for the entire chunk no? Chunks will grow from 16Kb to whatever, we just need to ensure we don't copy too much because we just read a garbage size to copy. Chunks here are just an optimization so if we fail to read the chunks we will fallback to read frame-by-frame (which sucks but it works).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I didn't know you do the second part (fallback to read one by one). I think that's fine then.

PyMem_RawFree(this_chunk);
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid stack chunk size (corrupted remote memory)");
return -1;
}

this_chunk = PyMem_RawRealloc(this_chunk, actual_size);
if (!this_chunk) {
PyErr_NoMemory();
Expand Down Expand Up @@ -129,7 +138,11 @@ void *
find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
{
for (size_t i = 0; i < chunks->count; ++i) {
assert(chunks->chunks[i].size > offsetof(_PyStackChunk, data));
// Validate size: reject garbage that would cause underflow
if (chunks->chunks[i].size <= offsetof(_PyStackChunk, data)) {
// Skip this chunk - corrupted size from remote memory
continue;
}
uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);

Expand Down
9 changes: 9 additions & 0 deletions Modules/_remote_debugging/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -584,13 +584,22 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
}

while (current_tstate != 0) {
uintptr_t prev_tstate = current_tstate;
PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate,
gil_holder_tstate,
gc_frame);
if (!frame_info) {
// Check if this was an intentional skip due to mode-based filtering
if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL ||
self->mode == PROFILING_MODE_EXCEPTION) && !PyErr_Occurred()) {
// Detect cycle: if current_tstate didn't advance, we have corrupted data
if (current_tstate == prev_tstate) {
Py_DECREF(interpreter_threads);
set_exception_cause(self, PyExc_RuntimeError,
"Thread list cycle detected (corrupted remote memory)");
Py_CLEAR(result);
goto exit;
}
// Thread was skipped due to mode filtering, continue to next thread
continue;
}
Expand Down
18 changes: 15 additions & 3 deletions Modules/_remote_debugging/object_reading.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,17 +194,29 @@ read_py_long(
return 0;
}

// If the long object has inline digits, use them directly
// Validate size: reject garbage (negative or unreasonably large)
if (size < 0 || size > MAX_LONG_DIGITS) {
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid PyLong size (corrupted remote memory)");
return -1;
}

// Calculate how many digits fit inline in our local buffer
Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit;
Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset;
Py_ssize_t max_inline_digits = inline_digits_space / (Py_ssize_t)sizeof(digit);

// If the long object has inline digits that fit in our buffer, use them directly
digit *digits;
if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
if (size <= max_inline_digits && size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
// For small integers, digits are inline in the long_value.ob_digit array
digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
if (!digits) {
PyErr_NoMemory();
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong");
return -1;
}
memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit));
memcpy(digits, long_obj + ob_digit_offset, size * sizeof(digit));
} else {
// For larger integers, we need to read the digits separately
digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
Expand Down
Loading