diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 7c2e0e95a80c3f..f356d60ae5c7a7 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -534,7 +534,21 @@ typedef struct { uint8_t count; uint8_t indices[MAX_RECORDED_VALUES]; } _PyOpcodeRecordEntry; + +typedef struct { + uint8_t count; + uint8_t transform_mask; + uint8_t slots[MAX_RECORDED_VALUES]; +} _PyOpcodeRecordSlotMap; + PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256]; +PyAPI_DATA(const _PyOpcodeRecordSlotMap) _PyOpcode_RecordSlotMaps[256]; + +/* Convert a family-recorded value to the form a recorder uop expects. + * If no transform is needed, return the input value unchanged. + * Takes ownership of `value` and returns a new strong reference or NULL. + */ +PyAPI_FUNC(PyObject *) _PyOpcode_RecordTransformValue(int uop, PyObject *value); #endif #ifdef __cplusplus diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 39075fc64cf02b..b37c35495983c3 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -5849,6 +5849,19 @@ def testfunc(n): self.assertNotIn("_LOAD_SUPER_ATTR_METHOD", uops) self.assertEqual(uops.count("_GUARD_NOS_TYPE_VERSION"), 2) + def test_settrace_then_polymorphic_call_does_not_crash(self): + script_helper.assert_python_ok("-c", textwrap.dedent(""" + import sys + sys.settrace(lambda *_: None) + sys.settrace(None) + + class C: + def __init__(self, x): + pass + + for i in 0, 1, 0, 1: + C(0) if i else str(0) + """)) def global_identity(x): return x diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 62cf0c0c6af0b2..748309b54593a1 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -2074,19 +2074,33 @@ def tearDown(self) -> None: pass super().tearDown() - def generate_tables(self, input: str) -> str: - import io + def analyze_input(self, input: str): with open(self.temp_input_filename, "w+") as f: f.write(parser.BEGIN_MARKER) f.write(input) f.write(parser.END_MARKER) with handle_stderr(): - analysis = analyze_files([self.temp_input_filename]) + return analyze_files([self.temp_input_filename]) + + def generate_tables(self, input: str) -> str: + import io + analysis = self.analyze_input(input) buf = io.StringIO() out = CWriter(buf, 0, False) record_function_generator.generate_recorder_tables(analysis, out) return buf.getvalue() + def get_slot_map_section(self, output: str) -> str: + return output.split( + "const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {\n", + 1, + )[1].split("};\n\n", 1)[0] + + def assert_slot_map_lines(self, output: str, *lines: str) -> None: + slot_map_section = self.get_slot_map_section(output) + for line in lines: + self.assertIn(line, slot_map_section) + def test_single_recording_uop_generates_count(self): input = """ tier2 op(_RECORD_TOS, (value -- value)) { @@ -2145,6 +2159,173 @@ def test_four_recording_uops_rejected(self): with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"): self.generate_tables(input) + def test_family_member_needs_transform_only_when_shape_changes(self): + input = """ + tier2 op(_RECORD_TOS, (value -- value)) { + RECORD_VALUE(value); + } + tier2 op(_RECORD_TOS_TYPE, (value -- value)) { + RECORD_VALUE(Py_TYPE(value)); + } + op(_DO_STUFF, (value -- res)) { + res = value; + } + macro(OP_RAW) = _RECORD_TOS + _DO_STUFF; + macro(OP_RAW_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF; + family(OP_RAW, INLINE_CACHE_ENTRIES_OP_RAW) = { OP_RAW_SPECIALIZED }; + + macro(OP_TYPED) = _RECORD_TOS_TYPE + _DO_STUFF; + macro(OP_TYPED_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF; + family(OP_TYPED, INLINE_CACHE_ENTRIES_OP_TYPED) = { OP_TYPED_SPECIALIZED }; + """ + output = self.generate_tables(input) + self.assert_slot_map_lines( + output, + "[OP_RAW] = {1, 1, {0}}", + "[OP_RAW_SPECIALIZED] = {1, 0, {0}}", + "[OP_TYPED] = {1, 0, {0}}", + "[OP_TYPED_SPECIALIZED] = {1, 0, {0}}", + ) + + def test_family_member_maps_positional_recorders_to_family_slots(self): + input = """ + tier2 op(_RECORD_TOS, (sub -- sub)) { + RECORD_VALUE(sub); + } + tier2 op(_RECORD_NOS, (container, sub -- container, sub)) { + RECORD_VALUE(container); + } + op(_DO_STUFF, (container, sub -- res)) { + res = container; + } + macro(OP) = _RECORD_TOS + _RECORD_NOS + _DO_STUFF; + macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF; + family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED }; + """ + output = self.generate_tables(input) + self.assert_slot_map_lines( + output, + "[OP] = {2, 0, {1, 0}}", + "[OP_SPECIALIZED] = {1, 0, {0}}", + ) + + def test_family_member_maps_non_positional_recorders_by_stack_shape(self): + input = """ + tier2 op(_RECORD_CALLABLE, (callable, self, args[oparg] -- callable, self, args[oparg])) { + RECORD_VALUE(callable); + } + tier2 op(_RECORD_BOUND_METHOD, (callable, self, args[oparg] -- callable, self, args[oparg])) { + RECORD_VALUE(callable); + } + op(_DO_STUFF, (callable, self, args[oparg] -- res)) { + res = callable; + } + macro(OP) = _RECORD_CALLABLE + _DO_STUFF; + macro(OP_SPECIALIZED) = _RECORD_BOUND_METHOD + _DO_STUFF; + family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED }; + """ + output = self.generate_tables(input) + self.assert_slot_map_lines( + output, + "[OP] = {1, 1, {0}}", + "[OP_SPECIALIZED] = {1, 0, {0}}", + ) + + def test_family_head_records_union_of_member_recorders(self): + input = """ + tier2 op(_RECORD_TOS, (value -- value)) { + RECORD_VALUE(value); + } + op(_DO_STUFF, (value -- res)) { + res = value; + } + macro(OP) = _DO_STUFF; + macro(OP_SPECIALIZED) = _RECORD_TOS + _DO_STUFF; + family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED }; + """ + output = self.generate_tables(input) + self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output) + self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output) + self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}") + + def test_family_detects_base_and_specialized_recording_difference(self): + input = """ + tier2 op(_RECORD_TOS, (value -- value)) { + RECORD_VALUE(value); + } + tier2 op(_RECORD_TOS_TYPE, (value -- value)) { + RECORD_VALUE(Py_TYPE(value)); + } + op(_DO_STUFF, (value -- res)) { + res = value; + } + macro(OP) = _RECORD_TOS + _DO_STUFF; + macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF; + family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED }; + """ + analysis = self.analyze_input(input) + output = self.generate_tables(input) + self.assertEqual( + record_function_generator.get_instruction_record_names( + analysis.instructions["OP"] + ), + ["_RECORD_TOS"], + ) + self.assertEqual( + record_function_generator.get_instruction_record_names( + analysis.instructions["OP_SPECIALIZED"] + ), + ["_RECORD_TOS_TYPE"], + ) + self.assertIn("[OP] = {1, {_RECORD_TOS_TYPE_INDEX}}", output) + self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_TYPE_INDEX}}", output) + self.assert_slot_map_lines( + output, + "[OP] = {1, 1, {0}}", + "[OP_SPECIALIZED] = {1, 0, {0}}", + ) + + def test_family_head_falls_back_for_missing_member_slots(self): + input = """ + tier2 op(_RECORD_TOS, (value -- value)) { + RECORD_VALUE(value); + } + op(_DO_STUFF, (value -- res)) { + res = value; + } + macro(OP) = _RECORD_TOS + _DO_STUFF; + macro(OP_SPECIALIZED) = _DO_STUFF; + family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED }; + """ + output = self.generate_tables(input) + self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output) + self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output) + + def test_family_mixed_slots_only_transform_changed_recorders(self): + input = """ + tier2 op(_RECORD_TOS_TYPE, (left, right -- left, right)) { + RECORD_VALUE(Py_TYPE(right)); + } + tier2 op(_RECORD_NOS_TYPE, (left, right -- left, right)) { + RECORD_VALUE(Py_TYPE(left)); + } + tier2 op(_RECORD_NOS, (left, right -- left, right)) { + RECORD_VALUE(left); + } + op(_DO_STUFF, (left, right -- res)) { + res = left; + } + macro(OP) = _RECORD_TOS_TYPE + _RECORD_NOS_TYPE + _DO_STUFF; + macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF; + family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED }; + """ + output = self.generate_tables(input) + self.assertIn("[OP] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}", output) + self.assert_slot_map_lines( + output, + "[OP] = {2, 2, {1, 0}}", + "[OP_SPECIALIZED] = {1, 0, {0}}", + ) class TestGeneratedAbstractCases(unittest.TestCase): def setUp(self) -> None: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-18-16-41-04.gh-issue-148571.Q6WB3A.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-18-16-41-04.gh-issue-148571.Q6WB3A.rst new file mode 100644 index 00000000000000..70eeada34320ac --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-18-16-41-04.gh-issue-148571.Q6WB3A.rst @@ -0,0 +1 @@ +Fix a crash in the JIT optimizer when specialized opcode families inherited incompatible recorded operand layouts. diff --git a/Python/optimizer.c b/Python/optimizer.c index a389c0f4072817..92b62d529fc909 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -658,6 +658,44 @@ is_terminator(const _PyUOpInstruction *uop) ); } +static PyObject * +record_trace_transform_to_type(PyObject *value) +{ + PyObject *tp = Py_NewRef((PyObject *)Py_TYPE(value)); + Py_DECREF(value); + return tp; +} + +/* _RECORD_NOS_GEN_FUNC and _RECORD_3OS_GEN_FUNC record the raw receiver. + * If it is a generator, return its function object; otherwise return NULL. + */ +static PyObject * +record_trace_transform_gen_func(PyObject *value) +{ + PyObject *func = NULL; + if (PyGen_Check(value)) { + _PyStackRef f = ((PyGenObject *)value)->gi_iframe.f_funcobj; + if (!PyStackRef_IsNull(f)) { + func = Py_NewRef(PyStackRef_AsPyObjectBorrow(f)); + } + } + Py_DECREF(value); + return func; +} + +/* _RECORD_BOUND_METHOD records the raw callable. + * Keep it only for bound methods; otherwise return NULL. + */ +static PyObject * +record_trace_transform_bound_method(PyObject *value) +{ + if (Py_TYPE(value) == &PyMethod_Type) { + return value; + } + Py_DECREF(value); + return NULL; +} + /* Returns 1 on success (added to trace), 0 on trace end. */ // gh-142543: inlining this function causes stack overflows @@ -831,6 +869,8 @@ _PyJit_translate_single_bytecode_to_trace( // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT trace->end -= 2; + const _PyOpcodeRecordSlotMap *record_slot_map = &_PyOpcode_RecordSlotMaps[opcode]; + assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); assert(!_PyErr_Occurred(tstate)); @@ -1027,8 +1067,15 @@ _PyJit_translate_single_bytecode_to_trace( } } else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) { - PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx]; - tracer->prev_state.recorded_values[record_idx] = NULL; + assert(record_idx < record_slot_map->count); + uint8_t record_slot = record_slot_map->slots[record_idx]; + assert(record_slot < tracer->prev_state.recorded_count); + PyObject *recorded_value = tracer->prev_state.recorded_values[record_slot]; + tracer->prev_state.recorded_values[record_slot] = NULL; + if ((record_slot_map->transform_mask & (1u << record_idx)) && + recorded_value != NULL) { + recorded_value = _PyOpcode_RecordTransformValue(uop, recorded_value); + } record_idx++; operand = (uintptr_t)recorded_value; } diff --git a/Python/record_functions.c.h b/Python/record_functions.c.h index dff13bfb45e5b0..504f6e1d9901c3 100644 --- a/Python/record_functions.c.h +++ b/Python/record_functions.c.h @@ -103,19 +103,45 @@ void _PyOpcode_RecordFunction_CODE(_PyInterpreterFrame *frame, _PyStackRef *stac #define _RECORD_3OS_GEN_FUNC_INDEX 3 #define _RECORD_NOS_GEN_FUNC_INDEX 4 #define _RECORD_CALLABLE_INDEX 5 -#define _RECORD_BOUND_METHOD_INDEX 6 -#define _RECORD_CALLABLE_KW_INDEX 7 -#define _RECORD_4OS_INDEX 8 -#define _RECORD_NOS_TYPE_INDEX 9 +#define _RECORD_CALLABLE_KW_INDEX 6 +#define _RECORD_4OS_INDEX 7 const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = { + [TO_BOOL_BOOL] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [TO_BOOL_NONE] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_SUPER_ATTR_ATTR] = {1, {_RECORD_NOS_INDEX}}, + [TO_BOOL] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [TO_BOOL_INT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [TO_BOOL_LIST] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [TO_BOOL_STR] = {1, {_RECORD_TOS_TYPE_INDEX}}, [TO_BOOL_ALWAYS_TRUE] = {1, {_RECORD_TOS_TYPE_INDEX}}, - [BINARY_OP_SUBSCR_GETITEM] = {1, {_RECORD_NOS_INDEX}}, + [BINARY_OP_MULTIPLY_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_ADD_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBTRACT_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_MULTIPLY_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_ADD_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBTRACT_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_ADD_UNICODE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_EXTEND] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_INPLACE_ADD_UNICODE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBSCR_LIST_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBSCR_LIST_SLICE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBSCR_STR_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBSCR_USTR_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBSCR_TUPLE_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBSCR_DICT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBSCR_GETITEM] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, + [SEND] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}}, [SEND_GEN] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}}, + [STORE_ATTR] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_SUPER_ATTR] = {1, {_RECORD_NOS_INDEX}}, [LOAD_SUPER_ATTR_METHOD] = {1, {_RECORD_NOS_INDEX}}, + [LOAD_ATTR] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_MODULE] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_CLASS] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_PROPERTY] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = {1, {_RECORD_TOS_TYPE_INDEX}}, @@ -125,6 +151,11 @@ const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = { [GET_ITER] = {1, {_RECORD_TOS_TYPE_INDEX}}, [GET_ITER_SELF] = {1, {_RECORD_TOS_TYPE_INDEX}}, [GET_ITER_VIRTUAL] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [FOR_ITER] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}}, + [FOR_ITER_VIRTUAL] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}}, + [FOR_ITER_LIST] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}}, + [FOR_ITER_TUPLE] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}}, + [FOR_ITER_RANGE] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}}, [FOR_ITER_GEN] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}}, [LOAD_SPECIAL] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_METHOD_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}}, @@ -132,34 +163,104 @@ const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = { [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}}, [LOAD_ATTR_METHOD_LAZY_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [CALL] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}}, - [CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_BOUND_METHOD_INDEX}}, + [CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_NON_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}}, - [CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_BOUND_METHOD_INDEX}}, + [CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_PY_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_TYPE_1] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_STR_1] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_TUPLE_1] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_ALLOC_AND_ENTER_INIT] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_BUILTIN_CLASS] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_BUILTIN_O] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_BUILTIN_FAST] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_LEN] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_ISINSTANCE] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_LIST_APPEND] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_METHOD_DESCRIPTOR_O] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_METHOD_DESCRIPTOR_NOARGS] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_METHOD_DESCRIPTOR_FAST] = {1, {_RECORD_CALLABLE_INDEX}}, [CALL_KW_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}}, [CALL_KW_BOUND_METHOD] = {1, {_RECORD_CALLABLE_KW_INDEX}}, + [CALL_KW] = {1, {_RECORD_CALLABLE_KW_INDEX}}, + [CALL_KW_NON_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}}, + [CALL_FUNCTION_EX] = {1, {_RECORD_4OS_INDEX}}, [CALL_EX_PY] = {1, {_RECORD_4OS_INDEX}}, - [BINARY_OP] = {2, {_RECORD_TOS_TYPE_INDEX, _RECORD_NOS_TYPE_INDEX}}, + [CALL_EX_NON_PY_GENERAL] = {1, {_RECORD_4OS_INDEX}}, + [BINARY_OP] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}, }; -const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[10] = { +const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = { + [TO_BOOL_ALWAYS_TRUE] = {1, 0, {0}}, + [BINARY_OP_SUBSCR_GETITEM] = {1, 0, {0}}, + [SEND_GEN] = {1, 0, {0}}, + [LOAD_SUPER_ATTR_METHOD] = {1, 0, {0}}, + [LOAD_ATTR_INSTANCE_VALUE] = {1, 0, {0}}, + [LOAD_ATTR_WITH_HINT] = {1, 0, {0}}, + [LOAD_ATTR_SLOT] = {1, 0, {0}}, + [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, 0, {0}}, + [LOAD_ATTR_PROPERTY] = {1, 0, {0}}, + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = {1, 0, {0}}, + [STORE_ATTR_INSTANCE_VALUE] = {1, 0, {0}}, + [STORE_ATTR_WITH_HINT] = {1, 0, {0}}, + [STORE_ATTR_SLOT] = {1, 0, {0}}, + [GET_ITER] = {1, 0, {0}}, + [GET_ITER_SELF] = {1, 0, {0}}, + [GET_ITER_VIRTUAL] = {1, 0, {0}}, + [FOR_ITER_GEN] = {1, 0, {0}}, + [LOAD_SPECIAL] = {1, 0, {0}}, + [LOAD_ATTR_METHOD_WITH_VALUES] = {1, 0, {0}}, + [LOAD_ATTR_METHOD_NO_DICT] = {1, 0, {0}}, + [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, 0, {0}}, + [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, 0, {0}}, + [LOAD_ATTR_METHOD_LAZY_DICT] = {1, 0, {0}}, + [CALL_PY_GENERAL] = {1, 0, {0}}, + [CALL_BOUND_METHOD_GENERAL] = {1, 1, {0}}, + [CALL_NON_PY_GENERAL] = {1, 0, {0}}, + [CALL_BOUND_METHOD_EXACT_ARGS] = {1, 1, {0}}, + [CALL_PY_EXACT_ARGS] = {1, 0, {0}}, + [CALL_ALLOC_AND_ENTER_INIT] = {1, 0, {0}}, + [CALL_BUILTIN_CLASS] = {1, 0, {0}}, + [CALL_BUILTIN_O] = {1, 0, {0}}, + [CALL_BUILTIN_FAST] = {1, 0, {0}}, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, 0, {0}}, + [CALL_METHOD_DESCRIPTOR_O] = {1, 0, {0}}, + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, 0, {0}}, + [CALL_METHOD_DESCRIPTOR_NOARGS] = {1, 0, {0}}, + [CALL_KW_PY] = {1, 0, {0}}, + [CALL_KW_BOUND_METHOD] = {1, 0, {0}}, + [CALL_EX_PY] = {1, 0, {0}}, + [BINARY_OP] = {2, 2, {1, 0}}, +}; + +const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[8] = { [0] = NULL, [_RECORD_TOS_TYPE_INDEX] = _PyOpcode_RecordFunction_TOS_TYPE, [_RECORD_NOS_INDEX] = _PyOpcode_RecordFunction_NOS, [_RECORD_3OS_GEN_FUNC_INDEX] = _PyOpcode_RecordFunction_3OS_GEN_FUNC, [_RECORD_NOS_GEN_FUNC_INDEX] = _PyOpcode_RecordFunction_NOS_GEN_FUNC, [_RECORD_CALLABLE_INDEX] = _PyOpcode_RecordFunction_CALLABLE, - [_RECORD_BOUND_METHOD_INDEX] = _PyOpcode_RecordFunction_BOUND_METHOD, [_RECORD_CALLABLE_KW_INDEX] = _PyOpcode_RecordFunction_CALLABLE_KW, [_RECORD_4OS_INDEX] = _PyOpcode_RecordFunction_4OS, - [_RECORD_NOS_TYPE_INDEX] = _PyOpcode_RecordFunction_NOS_TYPE, }; + +PyObject * +_PyOpcode_RecordTransformValue(int uop, PyObject *value) +{ + switch (uop) { + case _RECORD_TOS_TYPE: + case _RECORD_NOS_TYPE: + return record_trace_transform_to_type(value); + case _RECORD_NOS_GEN_FUNC: + case _RECORD_3OS_GEN_FUNC: + return record_trace_transform_gen_func(value); + case _RECORD_BOUND_METHOD: + return record_trace_transform_bound_method(value); + default: + return value; + } +} diff --git a/Tools/cases_generator/record_function_generator.py b/Tools/cases_generator/record_function_generator.py index d7ae0ebf79fe62..6f518ffdcf2ac2 100644 --- a/Tools/cases_generator/record_function_generator.py +++ b/Tools/cases_generator/record_function_generator.py @@ -28,6 +28,21 @@ # Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h. MAX_RECORDED_VALUES = 3 +# Map `_RECORD_*` uops to the helper that converts a raw family-recorded +# value to the form the specialized member consumes. +_RECORD_TRANSFORM_HELPERS: dict[str, str] = { + "_RECORD_TOS_TYPE": "record_trace_transform_to_type", + "_RECORD_NOS_TYPE": "record_trace_transform_to_type", + "_RECORD_NOS_GEN_FUNC": "record_trace_transform_gen_func", + "_RECORD_3OS_GEN_FUNC": "record_trace_transform_gen_func", + "_RECORD_BOUND_METHOD": "record_trace_transform_bound_method", +} + +# Recorder uops whose slot kind differs from the leading word of their name. +_RECORD_SLOT_KIND_OVERRIDES: dict[str, str] = { + "_RECORD_BOUND_METHOD": "CALLABLE", +} + class RecorderEmitter(Emitter): def __init__(self, out: CWriter): @@ -52,9 +67,83 @@ def record_value( return True +def get_record_slot_kind(record_name: str) -> str: + if record_name in _RECORD_SLOT_KIND_OVERRIDES: + return _RECORD_SLOT_KIND_OVERRIDES[record_name] + if not record_name.startswith("_RECORD_"): + return record_name + return record_name.removeprefix("_RECORD_").partition("_")[0] + + +def get_instruction_record_names(inst: Instruction) -> list[str]: + return [part.name for part in inst.parts if part.properties.records_value] + + +def get_family_record_names( + family_head: Instruction, + family_members: list[Instruction], + instruction_records: dict[str, list[str]], + record_slot_keys: dict[str, str], +) -> list[str]: + member_records = [instruction_records[m.name] for m in family_members] + all_member_names = {n for names in member_records for n in names} + records: list[str] = [] + slot_index: dict[str, int] = {} + + def add(name: str) -> None: + kind = record_slot_keys[name] + # Prefer the raw recorder if any member uses it; otherwise the given form. + raw = f"_RECORD_{kind}" + source = raw if raw in all_member_names else name + existing = slot_index.get(kind) + if existing is None: + slot_index[kind] = len(records) + records.append(source) + elif records[existing] != source: + raise ValueError( + f"Family {family_head.name} has incompatible recorders for " + f"slot {kind}: {records[existing]} and {source}" + ) + + for names in member_records: + for name in names: + add(name) + # Family head supplies any slots no member exercises. + for name in instruction_records[family_head.name]: + if record_slot_keys[name] not in slot_index: + slot_index[record_slot_keys[name]] = len(records) + records.append(name) + return records + + +def get_record_consumer_layout( + inst_name: str, + source_records: list[str], + own_records: list[str], + record_slot_keys: dict[str, str], +) -> tuple[list[int], int]: + used = [False] * len(source_records) + slot_map: list[int] = [] + transform_mask = 0 + for i, own in enumerate(own_records): + own_kind = record_slot_keys[own] + for j, src in enumerate(source_records): + if not used[j] and record_slot_keys[src] == own_kind: + used[j] = True + slot_map.append(j) + if src != own: + transform_mask |= 1 << i + break + else: + raise ValueError( + f"Instruction {inst_name} has no compatible family slot for " + f"{own} in {source_records}" + ) + return slot_map, transform_mask + def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: CWriter) -> None: - write_header(__file__, filenames, outfile) - outfile.write( + write_header(__file__, filenames, out.out) + out.out.write( """ #ifdef TIER_ONE #error "This file is for Tier 2 only" @@ -63,13 +152,10 @@ def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: C ) args = "_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, int oparg, PyObject **recorded_value" emitter = RecorderEmitter(out) - func_count = 0 nop = analysis.instructions["NOP"] - function_table: dict[str, int] = dict() - for name, uop in analysis.uops.items(): + for uop in analysis.uops.values(): if not uop.properties.records_value: continue - func_count += 1 out.emit(f"void _PyOpcode_RecordFunction{uop.name[7:]}({args}) {{\n") seen = {"unused"} for var in uop.stack.inputs: @@ -83,42 +169,109 @@ def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: C out.emit("\n\n") def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None: - record_function_indexes: dict[str, int] = dict() + instruction_records = { + inst.name: get_instruction_record_names(inst) + for inst in analysis.instructions.values() + } + record_uop_names = [ + name for name, uop in analysis.uops.items() if uop.properties.records_value + ] + record_slot_keys = {name: get_record_slot_kind(name) for name in record_uop_names} + family_record_table = { + family.name: get_family_record_names( + analysis.instructions[family.name], + family.members, + instruction_records, + record_slot_keys, + ) + for family in analysis.families.values() + } + record_table: dict[str, list[str]] = {} - index = 1 + record_consumer_table: dict[str, tuple[list[int], int]] = {} + record_function_indexes: dict[str, int] = {} for inst in analysis.instructions.values(): - if not inst.properties.records_value: + own_records = instruction_records[inst.name] + # TRACE_RECORD runs before execution, but specialization may rewrite + # the opcode before translation. Record the raw family shape (union + # of head + members) so any opcode in the family can be translated + # from the same recorded layout. + family = inst.family or analysis.families.get(inst.name) + records = family_record_table[family.name] if family is not None else own_records + if not records: continue - records: list[str] = [] - for part in inst.parts: - if not part.properties.records_value: - continue - if part.name not in record_function_indexes: - record_function_indexes[part.name] = index - index += 1 - records.append(part.name) - if records: - if len(records) > MAX_RECORDED_VALUES: - raise ValueError( - f"Instruction {inst.name} has {len(records)} recording ops, " - f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})" - ) - record_table[inst.name] = records - func_count = len(record_function_indexes) + if len(records) > MAX_RECORDED_VALUES: + raise ValueError( + f"Instruction {inst.name} has {len(records)} recording ops, " + f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})" + ) + record_table[inst.name] = records + for name in records: + if name not in record_function_indexes: + record_function_indexes[name] = len(record_function_indexes) + 1 + if own_records: + record_consumer_table[inst.name] = get_record_consumer_layout( + inst.name, records, own_records, record_slot_keys + ) for name, index in record_function_indexes.items(): out.emit(f"#define {name}_INDEX {index}\n") out.emit("\n") + out.emit("const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {\n") - for inst_name, record_names in record_table.items(): - indices = ", ".join(f"{name}_INDEX" for name in record_names) - out.emit(f" [{inst_name}] = {{{len(record_names)}, {{{indices}}}}},\n") + for inst_name, records in record_table.items(): + indices = ", ".join(f"{name}_INDEX" for name in records) + out.emit(f" [{inst_name}] = {{{len(records)}, {{{indices}}}}},\n") + out.emit("};\n\n") + + out.emit("const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {\n") + for inst_name, (slots, mask) in record_consumer_table.items(): + slot_list = ", ".join(str(s) for s in slots) + out.emit( + f" [{inst_name}] = {{{len(slots)}, {mask}, {{{slot_list}}}}},\n" + ) out.emit("};\n\n") - out.emit(f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[{func_count+1}] = {{\n") + + out.emit( + f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions" + f"[{len(record_function_indexes) + 1}] = {{\n" + ) out.emit(" [0] = NULL,\n") for name in record_function_indexes: out.emit(f" [{name}_INDEX] = _PyOpcode_RecordFunction{name[7:]},\n") out.emit("};\n") + generate_record_transform_dispatcher(record_uop_names, out) + + +def generate_record_transform_dispatcher( + record_uop_names: list[str], out: CWriter +) -> None: + """Emit a switch that converts a family-recorded value for a recorder uop. + + Only `_RECORD_*` uops that need conversion get a case; the default + returns the input value unchanged. Helpers live in Python/optimizer.c. + """ + cases: dict[str, list[str]] = {} + for record_name in record_uop_names: + helper = _RECORD_TRANSFORM_HELPERS.get(record_name) + if helper is None: + continue + cases.setdefault(helper, []).append(record_name) + out.emit("\n") + out.emit( + "PyObject *\n" + "_PyOpcode_RecordTransformValue(int uop, PyObject *value)\n" + "{\n" + ) + out.emit(" switch (uop) {\n") + for helper, names in cases.items(): + for name in names: + out.emit(f" case {name}:\n") + out.emit(f" return {helper}(value);\n") + out.emit(" default:\n") + out.emit(" return value;\n") + out.emit(" }\n") + out.emit("}\n") arg_parser = argparse.ArgumentParser(