mirror of
https://github.com/MariaDB/server.git
synced 2025-12-28 00:01:00 +00:00
MDEV-38164: Fix the estimates reported by TABLE::key_storage_length()
Some checks failed
Build on Windows ARM64 / build (push) Has been cancelled
Some checks failed
Build on Windows ARM64 / build (push) Has been cancelled
They were based on the maximum possible key tuple length, which can be much larger than the real data size. The return value is used by handler::keyread_time(), which is used to estimate the cost of range access. This could cause range access not to be picked, even if it uses the clustered PK and reads about 8% of the table. The fix is to add KEY::stat_storage_length (next to KEY::rec_per_key) and have the storage engine fill it in handler::info(HA_STATUS_CONST). Currently, only InnoDB fills this based on its internal statistics: index->stat_index_size and ib_table->stat_n_rows. Also changed: - In handler::calculate_costs(), use ha_keyread_clustered_time() when computing clustered PK read cost, not ha_keyread_time(). The fix is OFF by default and enabled by setting FIX_INDEX_LOOKUP_COST flag in @@new_mode.
This commit is contained in:
parent
8e7d42c03b
commit
4cff562f3f
@ -744,7 +744,7 @@ The following specify which files/extra groups are read (specified before remain
|
||||
connection before aborting the write
|
||||
--new-mode=name Used to introduce new behavior to existing MariaDB
|
||||
versions. Any combination of:
|
||||
FIX_INDEX_STATS_FOR_ALL_NULLS
|
||||
FIX_INDEX_STATS_FOR_ALL_NULLS, FIX_INDEX_LOOKUP_COST
|
||||
Use 'ALL' to set all combinations.
|
||||
--note-verbosity=name
|
||||
Verbosity level for note-warnings given to the user. See
|
||||
|
||||
42
mysql-test/main/optimizer_costs_innodb.result
Normal file
42
mysql-test/main/optimizer_costs_innodb.result
Normal file
@ -0,0 +1,42 @@
|
||||
#
|
||||
# MDEV-38164: Poor cost calculations for index access cause bad query plans for big VARCHARs in 11.x
|
||||
#
|
||||
create table t1 (
|
||||
pk1 int not null,
|
||||
pk2 int not null,
|
||||
filler1 varchar(500),
|
||||
filler2 varchar(500),
|
||||
filler3 varchar(500),
|
||||
filler4 varchar(500),
|
||||
filler5 varchar(500),
|
||||
filler6 varchar(500),
|
||||
filler7 varchar(500),
|
||||
filler8 varchar(500),
|
||||
filler9 varchar(500),
|
||||
fillerA varchar(500),
|
||||
fillerB varchar(500),
|
||||
fillerC varchar(500),
|
||||
fillerD varchar(500),
|
||||
fillerE varchar(500),
|
||||
fillerF varchar(500),
|
||||
primary key(pk1, pk2)
|
||||
) collate utf8mb4_general_ci engine=innodb;
|
||||
insert into t1 (pk1, pk2) select
|
||||
mod(seq, 8), seq
|
||||
from
|
||||
seq_1_to_10000;
|
||||
analyze table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status Engine-independent statistics collected
|
||||
test.t1 analyze status OK
|
||||
# Before the fix: this will use full table scan:
|
||||
explain select * from t1 where pk1=1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 ALL PRIMARY NULL NULL # 10000 Using where
|
||||
set @save_new_mode=@@new_mode;
|
||||
set new_mode=concat(@@new_mode,'FIX_INDEX_LOOKUP_COST');
|
||||
explain select * from t1 where pk1=1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 ref PRIMARY PRIMARY 4 # 1250
|
||||
set new_mode=@save_new_mode;
|
||||
drop table t1;
|
||||
44
mysql-test/main/optimizer_costs_innodb.test
Normal file
44
mysql-test/main/optimizer_costs_innodb.test
Normal file
@ -0,0 +1,44 @@
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_sequence.inc
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-38164: Poor cost calculations for index access cause bad query plans for big VARCHARs in 11.x
|
||||
--echo #
|
||||
create table t1 (
|
||||
pk1 int not null,
|
||||
pk2 int not null,
|
||||
filler1 varchar(500),
|
||||
filler2 varchar(500),
|
||||
filler3 varchar(500),
|
||||
filler4 varchar(500),
|
||||
filler5 varchar(500),
|
||||
filler6 varchar(500),
|
||||
filler7 varchar(500),
|
||||
filler8 varchar(500),
|
||||
filler9 varchar(500),
|
||||
fillerA varchar(500),
|
||||
fillerB varchar(500),
|
||||
fillerC varchar(500),
|
||||
fillerD varchar(500),
|
||||
fillerE varchar(500),
|
||||
fillerF varchar(500),
|
||||
primary key(pk1, pk2)
|
||||
) collate utf8mb4_general_ci engine=innodb;
|
||||
|
||||
insert into t1 (pk1, pk2) select
|
||||
mod(seq, 8), seq
|
||||
from
|
||||
seq_1_to_10000;
|
||||
analyze table t1;
|
||||
|
||||
--echo # Before the fix: this will use full table scan:
|
||||
--replace_column 8 #
|
||||
explain select * from t1 where pk1=1;
|
||||
|
||||
set @save_new_mode=@@new_mode;
|
||||
set new_mode=concat(@@new_mode,'FIX_INDEX_LOOKUP_COST');
|
||||
--replace_column 8 #
|
||||
explain select * from t1 where pk1=1;
|
||||
|
||||
set new_mode=@save_new_mode;
|
||||
drop table t1;
|
||||
@ -25,7 +25,7 @@ ERROR 42000: Variable 'new_mode' can't be set to the value of 'TEST_WARNING3'
|
||||
SET @@session.new_mode = "ALL";
|
||||
select @@session.new_mode;
|
||||
@@session.new_mode
|
||||
FIX_INDEX_STATS_FOR_ALL_NULLS
|
||||
FIX_INDEX_STATS_FOR_ALL_NULLS,FIX_INDEX_LOOKUP_COST
|
||||
SET @@global.new_mode = NULL;
|
||||
ERROR 42000: Variable 'new_mode' can't be set to the value of 'NULL'
|
||||
SET @@global.new_mode = '';
|
||||
|
||||
@ -2319,7 +2319,7 @@ VARIABLE_COMMENT Used to introduce new behavior to existing MariaDB versions
|
||||
NUMERIC_MIN_VALUE NULL
|
||||
NUMERIC_MAX_VALUE NULL
|
||||
NUMERIC_BLOCK_SIZE NULL
|
||||
ENUM_VALUE_LIST FIX_INDEX_STATS_FOR_ALL_NULLS
|
||||
ENUM_VALUE_LIST FIX_INDEX_STATS_FOR_ALL_NULLS,FIX_INDEX_LOOKUP_COST
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME NOTE_VERBOSITY
|
||||
|
||||
@ -2529,7 +2529,7 @@ VARIABLE_COMMENT Used to introduce new behavior to existing MariaDB versions
|
||||
NUMERIC_MIN_VALUE NULL
|
||||
NUMERIC_MAX_VALUE NULL
|
||||
NUMERIC_BLOCK_SIZE NULL
|
||||
ENUM_VALUE_LIST FIX_INDEX_STATS_FOR_ALL_NULLS
|
||||
ENUM_VALUE_LIST FIX_INDEX_STATS_FOR_ALL_NULLS,FIX_INDEX_LOOKUP_COST
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME NOTE_VERBOSITY
|
||||
|
||||
@ -75,9 +75,19 @@ void handler::calculate_costs(Cost_estimate *cost, uint keyno,
|
||||
{
|
||||
/* Clustered index */
|
||||
io_blocks= unassigned_single_point_ranges;
|
||||
cost->index_cost= ha_keyread_time(keyno, n_ranges,
|
||||
total_rows + multi_row_ranges,
|
||||
io_blocks);
|
||||
if (TEST_NEW_MODE_FLAG(table->in_use, NEW_MODE_FIX_INDEX_LOOKUP_COST))
|
||||
{
|
||||
cost->index_cost=
|
||||
ha_keyread_clustered_time(keyno, n_ranges,
|
||||
total_rows + multi_row_ranges,
|
||||
io_blocks);
|
||||
}
|
||||
else
|
||||
{
|
||||
cost->index_cost= ha_keyread_time(keyno, n_ranges,
|
||||
total_rows + multi_row_ranges,
|
||||
io_blocks);
|
||||
}
|
||||
cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
|
||||
}
|
||||
/* Adjust io cost to data size */
|
||||
@ -178,11 +188,9 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
||||
*/
|
||||
ulonglong unassigned_single_point_ranges= 0;
|
||||
|
||||
uint len= table->key_info[keyno].key_length + table->file->ref_length;
|
||||
if (table->file->is_clustering_key(keyno))
|
||||
len= table->s->stored_rec_length;
|
||||
size_t len= table->key_storage_length(keyno);
|
||||
/* Assume block is 75 % full */
|
||||
uint avg_block_records= ((uint) (stats.block_size*3/4))/len + 1;
|
||||
size_t avg_block_records= ((uint) (stats.block_size*3/4))/len + 1;
|
||||
uint limit= thd->variables.eq_range_index_dive_limit;
|
||||
bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq,
|
||||
seq_init_param,
|
||||
|
||||
@ -220,7 +220,8 @@ void old_mode_deprecated_warnings(THD *thd, ulonglong v);
|
||||
*/
|
||||
|
||||
#define NEW_MODE_FIX_INDEX_STATS_FOR_ALL_NULLS (1ULL << 0)
|
||||
#define NEW_MODE_MAX 1
|
||||
#define NEW_MODE_FIX_INDEX_LOOKUP_COST (1ULL << 1)
|
||||
#define NEW_MODE_MAX 2
|
||||
|
||||
/* Definitions above that have transitioned from new behaviour to default */
|
||||
|
||||
|
||||
@ -13486,6 +13486,7 @@ static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab,
|
||||
keyinfo->flags= HA_GENERATED_KEY;
|
||||
keyinfo->is_statistics_from_stat_tables= FALSE;
|
||||
keyinfo->all_nulls_key_parts= 0;
|
||||
keyinfo->stat_storage_length= 0;
|
||||
keyinfo->name.str= "$hj";
|
||||
keyinfo->name.length= 3;
|
||||
keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts);
|
||||
@ -22482,6 +22483,7 @@ bool Create_tmp_table::finalize(THD *thd,
|
||||
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
||||
keyinfo->is_statistics_from_stat_tables= FALSE;
|
||||
keyinfo->all_nulls_key_parts= 0;
|
||||
keyinfo->stat_storage_length= 0;
|
||||
keyinfo->name= group_key;
|
||||
keyinfo->comment.str= 0;
|
||||
ORDER *cur_group= m_group;
|
||||
@ -22604,6 +22606,7 @@ bool Create_tmp_table::finalize(THD *thd,
|
||||
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
||||
keyinfo->is_statistics_from_stat_tables= FALSE;
|
||||
keyinfo->all_nulls_key_parts= 0;
|
||||
keyinfo->stat_storage_length= 0;
|
||||
keyinfo->read_stats= NULL;
|
||||
keyinfo->collected_stats= NULL;
|
||||
|
||||
|
||||
@ -154,6 +154,12 @@ typedef struct st_key {
|
||||
*/
|
||||
ulong *rec_per_key;
|
||||
|
||||
/*
|
||||
Average space index tuple takes on disk, according to the engine's
|
||||
statistics. 0 if statistics is not available.
|
||||
*/
|
||||
size_t stat_storage_length;
|
||||
|
||||
/*
|
||||
This structure is used for statistical data on the index
|
||||
that has been read from the statistical table index_stat
|
||||
|
||||
@ -4056,6 +4056,7 @@ static Sys_var_set Sys_old_behavior(
|
||||
static const char *new_mode_all_names[]=
|
||||
{
|
||||
"FIX_INDEX_STATS_FOR_ALL_NULLS",
|
||||
"FIX_INDEX_LOOKUP_COST",
|
||||
"TEST_WARNING1", // Default from here, See NEW_MODE_MAX
|
||||
"TEST_WARNING2",
|
||||
0
|
||||
@ -4063,8 +4064,8 @@ static const char *new_mode_all_names[]=
|
||||
|
||||
static int new_mode_hidden_names[] =
|
||||
{
|
||||
1, // TEST_WARNING1
|
||||
2, // TEST_WARNING2
|
||||
2, // TEST_WARNING1
|
||||
3, // TEST_WARNING2
|
||||
-1 // End of list
|
||||
};
|
||||
|
||||
|
||||
15
sql/table.cc
15
sql/table.cc
@ -8643,6 +8643,7 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
|
||||
keyinfo->read_stats= NULL;
|
||||
keyinfo->collected_stats= NULL;
|
||||
keyinfo->all_nulls_key_parts= 0;
|
||||
keyinfo->stat_storage_length= 0;
|
||||
|
||||
for (i= 0; i < key_parts; i++)
|
||||
{
|
||||
@ -10932,3 +10933,17 @@ void TABLE::mark_table_for_reopen()
|
||||
DBUG_ASSERT(thd);
|
||||
thd->locked_tables_list.mark_table_for_reopen(this);
|
||||
}
|
||||
|
||||
/*
|
||||
@brief
|
||||
Estimate how much space is required to store one index tuple
|
||||
*/
|
||||
size_t TABLE::key_storage_length(uint index)
|
||||
{
|
||||
/* Use storage engine's statistics if it is available */
|
||||
if (TEST_NEW_MODE_FLAG(in_use, NEW_MODE_FIX_INDEX_LOOKUP_COST) &&
|
||||
key_info[index].stat_storage_length)
|
||||
return key_info[index].stat_storage_length;
|
||||
|
||||
return key_storage_length_from_ddl(index);
|
||||
}
|
||||
|
||||
15
sql/table.h
15
sql/table.h
@ -1756,7 +1756,20 @@ public:
|
||||
uint actual_n_key_parts(KEY *keyinfo);
|
||||
ulong actual_key_flags(KEY *keyinfo);
|
||||
int update_virtual_field(Field *vf, bool ignore_warnings);
|
||||
inline size_t key_storage_length(uint index)
|
||||
|
||||
size_t key_storage_length(uint index);
|
||||
/*
|
||||
@brief
|
||||
Estimate how index tuple takes in storage, based solely on table's DDL
|
||||
|
||||
@detail
|
||||
This is a conservative number that assumes the value is stored in
|
||||
KeyTupleFormat (or table->record format for clustered PK), without
|
||||
endspace compression, etc.
|
||||
On the other hand, it doesn't account that the storage engine may need
|
||||
to store transactionIds, etc.
|
||||
*/
|
||||
inline size_t key_storage_length_from_ddl(uint index)
|
||||
{
|
||||
if (is_clustering_key(index))
|
||||
return s->stored_rec_length;
|
||||
|
||||
@ -15003,6 +15003,28 @@ stats_fetch:
|
||||
}
|
||||
|
||||
KEY* key = &table->key_info[i];
|
||||
/*
|
||||
Provide statistics about how many bytes an index record takes on disk,
|
||||
on average.
|
||||
*/
|
||||
if (ib_table->stat_n_rows && !(key->flags & (HA_FULLTEXT | HA_SPATIAL))) {
|
||||
/*
|
||||
Start with total space used by the index divided by number of rows
|
||||
*/
|
||||
key->stat_storage_length = (size_t) ((index->stat_index_size * srv_page_size) /
|
||||
ib_table->stat_n_rows);
|
||||
|
||||
/*
|
||||
The above can be too large
|
||||
A) in case of tables with very few rows
|
||||
B) in case of indexes with partially full pages.
|
||||
So, clip the above estimate by a conservative estimate we've used
|
||||
before:
|
||||
*/
|
||||
size_t conservative_estimate = table->key_storage_length_from_ddl(i);
|
||||
if (key->stat_storage_length > conservative_estimate)
|
||||
key->stat_storage_length = conservative_estimate;
|
||||
}
|
||||
|
||||
for (j = 0; j < key->ext_key_parts; j++) {
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user