Skip to content

Commit 5d41632

Browse files
authored
gh-146455: Fix O(N²) in add_const() after constant folding moved to CFG (#146456)
The add_const() function in flowgraph.c uses a linear search over the consts list to find the index of a constant. After gh-126835 moved constant folding from the AST optimizer to the CFG optimizer, this function is now called N times for N inner tuple elements during fold_tuple_of_constants(), resulting in O(N²) total time. Fix by maintaining an auxiliary _Py_hashtable_t that maps object pointers to their indices in the consts list, providing O(1) lookup. For a file with 100,000 constant 2-tuples: - Before: 10.38s (add_const occupies 83.76% of CPU time) - After: 1.48s
1 parent 6d4ca16 commit 5d41632

2 files changed

Lines changed: 89 additions & 43 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix O(N²) compile-time regression in constant folding after it was moved from AST to CFG optimizer.

Python/flowgraph.c

Lines changed: 88 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "pycore_intrinsics.h"
77
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
88
#include "pycore_long.h" // _PY_IS_SMALL_INT()
9+
#include "pycore_hashtable.h" // _Py_hashtable_t
910

1011
#include "pycore_opcode_utils.h"
1112
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
@@ -1333,30 +1334,38 @@ get_const_value(int opcode, int oparg, PyObject *co_consts)
13331334

13341335
// Steals a reference to newconst.
13351336
static int
1336-
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
1337+
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
1338+
_Py_hashtable_t *consts_index)
13371339
{
13381340
if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
13391341
Py_DECREF(newconst);
13401342
return -1;
13411343
}
13421344

1343-
Py_ssize_t index;
1344-
for (index = 0; index < PyList_GET_SIZE(consts); index++) {
1345-
if (PyList_GET_ITEM(consts, index) == newconst) {
1346-
break;
1347-
}
1345+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
1346+
if (entry != NULL) {
1347+
Py_DECREF(newconst);
1348+
return (int)(uintptr_t)entry->value;
13481349
}
1349-
if (index == PyList_GET_SIZE(consts)) {
1350-
if ((size_t)index >= (size_t)INT_MAX - 1) {
1351-
PyErr_SetString(PyExc_OverflowError, "too many constants");
1352-
Py_DECREF(newconst);
1353-
return -1;
1354-
}
1355-
if (PyList_Append(consts, newconst)) {
1356-
Py_DECREF(newconst);
1357-
return -1;
1358-
}
1350+
1351+
Py_ssize_t index = PyList_GET_SIZE(consts);
1352+
if ((size_t)index >= (size_t)INT_MAX - 1) {
1353+
PyErr_SetString(PyExc_OverflowError, "too many constants");
1354+
Py_DECREF(newconst);
1355+
return -1;
1356+
}
1357+
if (PyList_Append(consts, newconst)) {
1358+
Py_DECREF(newconst);
1359+
return -1;
1360+
}
1361+
1362+
if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
1363+
PyList_SetSlice(consts, index, index + 1, NULL);
1364+
Py_DECREF(newconst);
1365+
PyErr_NoMemory();
1366+
return -1;
13591367
}
1368+
13601369
Py_DECREF(newconst);
13611370
return (int)index;
13621371
}
@@ -1432,7 +1441,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
14321441
/* Steals reference to "newconst" */
14331442
static int
14341443
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
1435-
PyObject *consts, PyObject *const_cache)
1444+
PyObject *consts, PyObject *const_cache,
1445+
_Py_hashtable_t *consts_index)
14361446
{
14371447
int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
14381448
if (res < 0) {
@@ -1442,7 +1452,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
14421452
if (res > 0) {
14431453
return SUCCESS;
14441454
}
1445-
int oparg = add_const(newconst, consts, const_cache);
1455+
int oparg = add_const(newconst, consts, const_cache, consts_index);
14461456
RETURN_IF_ERROR(oparg);
14471457
INSTR_SET_OP1(instr, LOAD_CONST, oparg);
14481458
return SUCCESS;
@@ -1455,7 +1465,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
14551465
Called with codestr pointing to the first LOAD_CONST.
14561466
*/
14571467
static int
1458-
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1468+
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
1469+
PyObject *const_cache, _Py_hashtable_t *consts_index)
14591470
{
14601471
/* Pre-conditions */
14611472
assert(PyDict_CheckExact(const_cache));
@@ -1492,7 +1503,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
14921503
}
14931504

14941505
nop_out(const_instrs, seq_size);
1495-
return instr_make_load_const(instr, const_tuple, consts, const_cache);
1506+
return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
14961507
}
14971508

14981509
/* Replace:
@@ -1510,7 +1521,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
15101521
*/
15111522
static int
15121523
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
1513-
PyObject *consts, PyObject *const_cache)
1524+
PyObject *consts, PyObject *const_cache,
1525+
_Py_hashtable_t *consts_index)
15141526
{
15151527
assert(PyDict_CheckExact(const_cache));
15161528
assert(PyList_CheckExact(consts));
@@ -1562,7 +1574,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
15621574
nop_out(&instr, 1);
15631575
}
15641576
assert(consts_found == 0);
1565-
return instr_make_load_const(intrinsic, newconst, consts, const_cache);
1577+
return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
15661578
}
15671579

15681580
if (expect_append) {
@@ -1598,7 +1610,8 @@ Optimize lists and sets for:
15981610
*/
15991611
static int
16001612
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
1601-
PyObject *consts, PyObject *const_cache)
1613+
PyObject *consts, PyObject *const_cache,
1614+
_Py_hashtable_t *consts_index)
16021615
{
16031616
assert(PyDict_CheckExact(const_cache));
16041617
assert(PyList_CheckExact(consts));
@@ -1648,7 +1661,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
16481661
Py_SETREF(const_result, frozenset);
16491662
}
16501663

1651-
int index = add_const(const_result, consts, const_cache);
1664+
int index = add_const(const_result, consts, const_cache, consts_index);
16521665
RETURN_IF_ERROR(index);
16531666
nop_out(const_instrs, seq_size);
16541667

@@ -1845,7 +1858,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)
18451858
}
18461859

18471860
static int
1848-
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1861+
fold_const_binop(basicblock *bb, int i, PyObject *consts,
1862+
PyObject *const_cache, _Py_hashtable_t *consts_index)
18491863
{
18501864
#define BINOP_OPERAND_COUNT 2
18511865
assert(PyDict_CheckExact(const_cache));
@@ -1887,7 +1901,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
18871901
}
18881902

18891903
nop_out(operands_instrs, BINOP_OPERAND_COUNT);
1890-
return instr_make_load_const(binop, newconst, consts, const_cache);
1904+
return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
18911905
}
18921906

18931907
static PyObject *
@@ -1933,7 +1947,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg)
19331947
}
19341948

19351949
static int
1936-
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
1950+
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
1951+
PyObject *const_cache, _Py_hashtable_t *consts_index)
19371952
{
19381953
#define UNARYOP_OPERAND_COUNT 1
19391954
assert(PyDict_CheckExact(const_cache));
@@ -1970,7 +1985,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach
19701985
assert(PyBool_Check(newconst));
19711986
}
19721987
nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
1973-
return instr_make_load_const(unaryop, newconst, consts, const_cache);
1988+
return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
19741989
}
19751990

19761991
#define VISITED (-1)
@@ -2165,7 +2180,8 @@ apply_static_swaps(basicblock *block, int i)
21652180
}
21662181

21672182
static int
2168-
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
2183+
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
2184+
PyObject *consts, _Py_hashtable_t *consts_index)
21692185
{
21702186
assert(PyDict_CheckExact(const_cache));
21712187
assert(PyList_CheckExact(consts));
@@ -2283,7 +2299,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
22832299
return ERROR;
22842300
}
22852301
cnt = PyBool_FromLong(is_true);
2286-
int index = add_const(cnt, consts, const_cache);
2302+
int index = add_const(cnt, consts, const_cache, consts_index);
22872303
if (index < 0) {
22882304
return ERROR;
22892305
}
@@ -2297,15 +2313,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
22972313
}
22982314

22992315
static int
2300-
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
2316+
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
2317+
_Py_hashtable_t *consts_index) {
23012318
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2302-
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
2319+
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
23032320
}
23042321
return SUCCESS;
23052322
}
23062323

23072324
static int
2308-
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
2325+
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
2326+
_Py_hashtable_t *consts_index)
23092327
{
23102328
assert(PyDict_CheckExact(const_cache));
23112329
assert(PyList_CheckExact(consts));
@@ -2345,11 +2363,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
23452363
continue;
23462364
}
23472365
}
2348-
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
2366+
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
23492367
break;
23502368
case BUILD_LIST:
23512369
case BUILD_SET:
2352-
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
2370+
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
23532371
break;
23542372
case POP_JUMP_IF_NOT_NONE:
23552373
case POP_JUMP_IF_NONE:
@@ -2484,23 +2502,23 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
24842502
_Py_FALLTHROUGH;
24852503
case UNARY_INVERT:
24862504
case UNARY_NEGATIVE:
2487-
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2505+
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
24882506
break;
24892507
case CALL_INTRINSIC_1:
24902508
if (oparg == INTRINSIC_LIST_TO_TUPLE) {
24912509
if (nextop == GET_ITER) {
24922510
INSTR_SET_OP0(inst, NOP);
24932511
}
24942512
else {
2495-
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
2513+
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
24962514
}
24972515
}
24982516
else if (oparg == INTRINSIC_UNARY_POSITIVE) {
2499-
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
2517+
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
25002518
}
25012519
break;
25022520
case BINARY_OP:
2503-
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
2521+
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
25042522
break;
25052523
}
25062524
}
@@ -2545,16 +2563,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)
25452563
NOPs. Later those NOPs are removed.
25462564
*/
25472565
static int
2548-
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
2566+
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
2567+
_Py_hashtable_t *consts_index, int firstlineno)
25492568
{
25502569
assert(PyDict_CheckExact(const_cache));
25512570
RETURN_IF_ERROR(check_cfg(g));
25522571
RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
25532572
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
25542573
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
2555-
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
2574+
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
25562575
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
2557-
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
2576+
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
25582577
}
25592578
RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
25602579
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
@@ -3674,7 +3693,33 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
36743693
RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
36753694

36763695
/** Optimization **/
3677-
RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
3696+
3697+
_Py_hashtable_t *consts_index = _Py_hashtable_new(
3698+
_Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
3699+
if (consts_index == NULL) {
3700+
PyErr_NoMemory();
3701+
return ERROR;
3702+
}
3703+
3704+
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
3705+
PyObject *item = PyList_GET_ITEM(consts, i);
3706+
if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
3707+
continue;
3708+
}
3709+
if (_Py_hashtable_set(consts_index, (void *)item,
3710+
(void *)(uintptr_t)i) < 0) {
3711+
_Py_hashtable_destroy(consts_index);
3712+
PyErr_NoMemory();
3713+
return ERROR;
3714+
}
3715+
}
3716+
3717+
int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
3718+
3719+
_Py_hashtable_destroy(consts_index);
3720+
3721+
RETURN_IF_ERROR(ret);
3722+
36783723
RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
36793724
RETURN_IF_ERROR(
36803725
add_checks_for_loads_of_uninitialized_variables(

0 commit comments

Comments
 (0)