StoneDB/sql/sql_executor.h

689 lines
22 KiB
C++

#ifndef SQL_EXECUTOR_INCLUDED
#define SQL_EXECUTOR_INCLUDED
/* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights
* reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is also distributed with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have included with MySQL.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
/** @file Classes for query execution */
#include "records.h" // READ_RECORD
#include "sql_opt_exec_shared.h" // QEP_shared_owner
class JOIN;
class JOIN_TAB;
class QEP_TAB;
typedef struct st_table_ref TABLE_REF;
typedef struct st_position POSITION;
/**
Possible status of a "nested loop" operation (Next_select_func family of
functions).
All values except NESTED_LOOP_OK abort the nested loop.
*/
enum enum_nested_loop_state
{
/**
Thread shutdown was requested while processing the record
@todo could it be merged with NESTED_LOOP_ERROR? Why two distinct states?
*/
NESTED_LOOP_KILLED= -2,
/// A fatal error (like table corruption) was detected
NESTED_LOOP_ERROR= -1,
/// Record has been successfully handled
NESTED_LOOP_OK= 0,
/**
Record has been successfully handled; additionally, the nested loop
produced the number of rows specified in the LIMIT clause for the query.
*/
NESTED_LOOP_QUERY_LIMIT= 3,
/**
Record has been successfully handled; additionally, there is a cursor and
the nested loop algorithm produced the number of rows that is specified
for current cursor fetch operation.
*/
NESTED_LOOP_CURSOR_LIMIT= 4
};
typedef enum_nested_loop_state
(*Next_select_func)(JOIN *, class QEP_TAB *, bool);
/*
Temporary table used by semi-join DuplicateElimination strategy
This consists of the temptable itself and data needed to put records
into it. The table's DDL is as follows:
CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col));
where the primary key can be replaced with unique constraint if n exceeds
the limit (as it is always done for query execution-time temptables).
The record value is a concatenation of rowids of tables from the join we're
executing. If a join table is on the inner side of the outer join, we
assume that its rowid can be NULL and provide means to store this rowid in
the tuple.
*/
class SJ_TMP_TABLE : public Sql_alloc
{
public:
SJ_TMP_TABLE():hash_field(NULL)
{}
/*
Array of pointers to tables whose rowids compose the temporary table
record.
*/
class TAB
{
public:
QEP_TAB *qep_tab;
uint rowid_offset;
ushort null_byte;
uchar null_bit;
};
TAB *tabs;
TAB *tabs_end;
/*
is_confluent==TRUE means this is a special case where the temptable record
has zero length (and presence of a unique key means that the temptable can
have either 0 or 1 records).
In this case we don't create the physical temptable but instead record
its state in SJ_TMP_TABLE::have_confluent_record.
*/
bool is_confluent;
/*
When is_confluent==TRUE: the contents of the table (whether it has the
record or not).
*/
bool have_confluent_row;
/* table record parameters */
uint null_bits;
uint null_bytes;
uint rowid_len;
/* The temporary table itself (NULL means not created yet) */
TABLE *tmp_table;
/*
These are the members we got from temptable creation code. We'll need
them if we'll need to convert table from HEAP to MyISAM/Maria.
*/
MI_COLUMNDEF *start_recinfo;
MI_COLUMNDEF *recinfo;
/* Pointer to next table (next->start_idx > this->end_idx) */
SJ_TMP_TABLE *next;
/* Calc hash instead of too long key */
Field_longlong *hash_field;
};
/**
Executor structure for the materialized semi-join info, which contains
- Description of expressions selected from subquery
- The sj-materialization temporary table
*/
class Semijoin_mat_exec : public Sql_alloc
{
public:
Semijoin_mat_exec(TABLE_LIST *sj_nest, bool is_scan, uint table_count,
uint mat_table_index, uint inner_table_index)
:sj_nest(sj_nest), is_scan(is_scan), table_count(table_count),
mat_table_index(mat_table_index), inner_table_index(inner_table_index),
table_param(), table(NULL)
{}
~Semijoin_mat_exec()
{}
TABLE_LIST *const sj_nest; ///< Semi-join nest for this materialization
const bool is_scan; ///< TRUE if executing a scan, FALSE if lookup
const uint table_count; ///< Number of tables in the sj-nest
const uint mat_table_index; ///< Index in join_tab for materialized table
const uint inner_table_index; ///< Index in join_tab for first inner table
Temp_table_param table_param; ///< The temptable and its related info
TABLE *table; ///< Reference to temporary table
};
/**
QEP_operation is an interface class for operations in query execution plan.
Currently following operations are implemented:
JOIN_CACHE - caches partial join result and joins with attached table
QEP_tmp_table - materializes join result in attached table
An operation's life cycle is as follows:
.) it is initialized on the init() call
.) accumulates records one by one when put_record() is called.
.) finalize record sending when end_send() is called.
.) free all internal buffers on the free() call.
Each operation is attached to a join_tab, to which exactly depends on the
operation type: JOIN_CACHE is attached to the table following the table
being cached, QEP_tmp_buffer is attached to a tmp table.
*/
class QEP_operation :public Sql_alloc
{
public:
// Type of the operation
enum enum_op_type { OT_CACHE, OT_TMP_TABLE };
/**
For JOIN_CACHE : Table to be joined with the partial join records from
the cache
For JOIN_TMP_BUFFER : join_tab of tmp table
*/
QEP_TAB *qep_tab;
QEP_operation(): qep_tab(NULL) {};
QEP_operation(QEP_TAB *qep_tab_arg): qep_tab(qep_tab_arg) {};
virtual ~QEP_operation() {};
virtual enum_op_type type()= 0;
/**
Initialize operation's internal state. Called once per query execution.
*/
virtual int init() { return 0; };
/**
Put a new record into the operation's buffer
@return
return one of enum_nested_loop_state values.
*/
virtual enum_nested_loop_state put_record()= 0;
/**
Finalize records sending.
*/
virtual enum_nested_loop_state end_send()= 0;
/**
Internal state cleanup.
*/
virtual void mem_free() {};
};
/**
@brief
Class for accumulating join result in a tmp table, grouping them if
necessary, and sending further.
@details
Join result records are accumulated on the put_record() call.
The accumulation process is determined by the write_func, it could be:
end_write Simply store all records in tmp table.
end_write_group Perform grouping using join->group_fields,
records are expected to be sorted.
end_update Perform grouping using the key generated on tmp
table. Input records aren't expected to be sorted.
Tmp table uses the heap engine
end_update_unique Same as above, but the engine is myisam.
Lazy table initialization is used - the table will be instantiated and
rnd/index scan started on the first put_record() call.
*/
class QEP_tmp_table :public QEP_operation
{
public:
QEP_tmp_table(QEP_TAB *qep_tab_arg) :
QEP_operation(qep_tab_arg), write_func(NULL)
{};
enum_op_type type() { return OT_TMP_TABLE; }
enum_nested_loop_state put_record() { return put_record(false); };
/*
Send the result of operation further (to a next operation/client)
This function is called after all records were put into the buffer
(determined by the caller).
@return return one of enum_nested_loop_state values.
*/
enum_nested_loop_state end_send();
/** write_func setter */
void set_write_func(Next_select_func new_write_func)
{
write_func= new_write_func;
}
private:
/** Write function that would be used for saving records in tmp table. */
Next_select_func write_func;
enum_nested_loop_state put_record(bool end_of_records);
MY_ATTRIBUTE((warn_unused_result))
bool prepare_tmp_table();
};
void setup_tmptable_write_func(QEP_TAB *tab);
enum_nested_loop_state sub_select_op(JOIN *join, QEP_TAB *qep_tab, bool
end_of_records);
enum_nested_loop_state end_send_group(JOIN *join, QEP_TAB *qep_tab,
bool end_of_records);
enum_nested_loop_state end_write_group(JOIN *join, QEP_TAB *qep_tab,
bool end_of_records);
enum_nested_loop_state sub_select(JOIN *join,QEP_TAB *qep_tab, bool
end_of_records);
enum_nested_loop_state
evaluate_join_record(JOIN *join, QEP_TAB *qep_tab, int error);
MY_ATTRIBUTE((warn_unused_result))
bool copy_fields(Temp_table_param *param, const THD *thd);
bool copy_funcs(Func_ptr_array*, const THD *thd);
bool cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref);
/** Help function when we get some an error from the table handler. */
int report_handler_error(TABLE *table, int error);
int safe_index_read(QEP_TAB *tab);
st_sort_field * make_unireg_sortorder(ORDER *order, uint *length,
st_sort_field *sortorder);
int join_read_const_table(JOIN_TAB *tab, POSITION *pos);
void join_read_key_unlock_row(st_join_table *tab);
void join_const_unlock_row(st_join_table *tab);
int join_init_quick_read_record(QEP_TAB *tab);
int join_init_read_record(QEP_TAB *tab);
int join_read_first(QEP_TAB *tab);
int join_read_last(QEP_TAB *tab);
int join_read_last_key(QEP_TAB *tab);
int join_materialize_derived(QEP_TAB *tab);
int join_materialize_semijoin(QEP_TAB *tab);
int join_read_prev_same(READ_RECORD *info);
int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl);
int test_if_item_cache_changed(List<Cached_item> &list);
// Create list for using with tempory table
bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
List<Item> &new_list1,
List<Item> &new_list2,
uint elements, List<Item> &items);
// Create list for using with tempory table
bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
List<Item> &new_list1,
List<Item> &new_list2,
uint elements, List<Item> &items);
bool alloc_group_fields(JOIN *join, ORDER *group);
bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct);
bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
bool make_group_fields(JOIN *main_join, JOIN *curr_join);
bool setup_copy_fields(THD *thd, Temp_table_param *param,
Ref_ptr_array ref_pointer_array,
List<Item> &res_selected_fields, List<Item> &res_all_fields,
uint elements, List<Item> &all_fields);
bool check_unique_constraint(TABLE *table);
ulonglong unique_hash(Field *field, ulonglong *hash);
class Opt_trace_object;
class QEP_TAB : public Sql_alloc, public QEP_shared_owner
{
public:
QEP_TAB() :
QEP_shared_owner(),
table_ref(NULL),
flush_weedout_table(NULL),
check_weed_out_table(NULL),
firstmatch_return(NO_PLAN_IDX),
loosescan_key_len(0),
loosescan_buf(NULL),
match_tab(NO_PLAN_IDX),
found_match(false),
found(false),
not_null_compl(false),
first_unmatched(NO_PLAN_IDX),
materialized(false),
materialize_table(NULL),
read_first_record(NULL),
next_select(NULL),
read_record(),
save_read_first_record(NULL),
save_read_record(NULL),
used_null_fields(false),
used_uneven_bit_fields(false),
keep_current_rowid(false),
copy_current_rowid(NULL),
distinct(false),
not_used_in_distinct(false),
cache_idx_cond(NULL),
having(NULL),
op(NULL),
tmp_table_param(NULL),
filesort(NULL),
fields(NULL),
all_fields(NULL),
ref_array(NULL),
send_records(0),
quick_traced_before(false),
m_condition_optim(NULL),
m_quick_optim(NULL),
m_keyread_optim(false)
{
/**
@todo Add constructor to READ_RECORD.
All users do init_read_record(), which does memset(),
rather than invoking a constructor.
*/
}
/// Initializes the object from a JOIN_TAB
void init(JOIN_TAB *jt);
// Cleans up.
void cleanup();
// Getters and setters
Item *condition_optim() const { return m_condition_optim; }
QUICK_SELECT_I *quick_optim() const { return m_quick_optim; }
void set_quick_optim() { m_quick_optim= quick(); }
void set_condition_optim() { m_condition_optim= condition(); }
bool keyread_optim() const { return m_keyread_optim; }
void set_keyread_optim()
{
if (table())
m_keyread_optim= table()->key_read;
}
void set_table(TABLE *t)
{
m_qs->set_table(t);
if (t)
t->reginfo.qep_tab= this;
}
/// @returns semijoin strategy for this table.
uint get_sj_strategy() const;
/// Return true if join_tab should perform a FirstMatch action
bool do_firstmatch() const { return firstmatch_return != NO_PLAN_IDX; }
/// Return true if join_tab should perform a LooseScan action
bool do_loosescan() const { return loosescan_key_len; }
/// Return true if join_tab starts a Duplicate Weedout action
bool starts_weedout() const { return flush_weedout_table; }
/// Return true if join_tab finishes a Duplicate Weedout action
bool finishes_weedout() const { return check_weed_out_table; }
bool prepare_scan();
/**
A helper function that allocates appropriate join cache object and
sets next_select function of previous tab.
*/
void init_join_cache(JOIN_TAB *join_tab);
/**
@returns query block id for an inner table of materialized semi-join, and
0 for all other tables.
@note implementation is not efficient (loops over all tables) - use this
function only in EXPLAIN.
*/
uint sjm_query_block_id() const;
/// @returns whether this is doing QS_DYNAMIC_RANGE
bool dynamic_range() const
{
if (!position())
return false; // tmp table
return read_first_record == join_init_quick_read_record;
}
bool use_order() const; ///< Use ordering provided by chosen index?
bool sort_table();
bool remove_duplicates();
inline bool skip_record(THD *thd, bool *skip_record_arg)
{
*skip_record_arg= condition() ? condition()->val_int() == FALSE : FALSE;
return thd->is_error();
}
/**
Used to begin a new execution of a subquery. Necessary if this subquery
has done a filesort which which has cleared condition/quick.
*/
void restore_quick_optim_and_condition()
{
if (m_condition_optim)
set_condition(m_condition_optim);
if (m_quick_optim)
set_quick(m_quick_optim);
}
void pick_table_access_method(const JOIN_TAB *join_tab);
void set_pushed_table_access_method(void);
void push_index_cond(const JOIN_TAB *join_tab,
uint keyno, Opt_trace_object *trace_obj);
/// @return the index used for a table in a QEP
uint effective_index() const;
bool pfs_batch_update(JOIN *join);
public:
/// Pointer to table reference
TABLE_LIST *table_ref;
/* Variables for semi-join duplicate elimination */
SJ_TMP_TABLE *flush_weedout_table;
SJ_TMP_TABLE *check_weed_out_table;
/*
If set, means we should stop join enumeration after we've got the first
match and return to the specified join tab. May be PRE_FIRST_PLAN_IDX
which means stopping join execution after the first match.
*/
plan_idx firstmatch_return;
/*
Length of key tuple (depends on #keyparts used) to store in loosescan_buf.
If zero, means that loosescan is not used.
*/
uint loosescan_key_len;
/* Buffer to save index tuple to be able to skip duplicates */
uchar *loosescan_buf;
/*
If doing a LooseScan, this QEP is the first (i.e. "driving")
QEP_TAB, and match_tab points to the last QEP_TAB handled by the strategy.
match_tab->found_match should be checked to see if the current value group
had a match.
If doing a FirstMatch, check this QEP_TAB to see if there is a match.
Unless the FirstMatch performs a "split jump", this is equal to the
current QEP_TAB.
*/
plan_idx match_tab;
/*
Used by FirstMatch and LooseScan. TRUE <=> there is a matching
record combination
*/
bool found_match;
/**
Used to decide whether an inner table of an outer join should produce NULL
values. If it is true after a call to evaluate_join_record(), the join
condition has been satisfied for at least one row from the inner
table. This member is not really manipulated by this class, see sub_select
for details on its use.
*/
bool found;
/**
This member is true as long as we are evaluating rows from the inner
tables of an outer join. If none of these rows satisfy the join condition,
we generated NULL-complemented rows and set this member to false. In the
meantime, the value may be read by triggered conditions, see
Item_func_trig_cond::val_int().
*/
bool not_null_compl;
plan_idx first_unmatched; /**< used for optimization purposes only */
/// For a materializable derived or SJ table: true if has been materialized
bool materialized;
READ_RECORD::Setup_func materialize_table;
/**
Initialize table for reading and fetch the first row from the table. If
table is a materialized derived one, function must materialize it with
prepare_scan().
*/
READ_RECORD::Setup_func read_first_record;
Next_select_func next_select;
READ_RECORD read_record;
/*
The following two fields are used for a [NOT] IN subquery if it is
executed by an alternative full table scan when the left operand of
the subquery predicate is evaluated to NULL.
*/
READ_RECORD::Setup_func save_read_first_record;/* to save read_first_record */
READ_RECORD::Read_func save_read_record;/* to save read_record.read_record */
// join-cache-related members
bool used_null_fields;
bool used_uneven_bit_fields;
/*
Used by DuplicateElimination. tab->table->ref must have the rowid
whenever we have a current record. copy_current_rowid needed because
we cannot bind to the rowid buffer before the table has been opened.
*/
bool keep_current_rowid;
st_cache_field *copy_current_rowid;
/** TRUE <=> remove duplicates on this table. */
bool distinct;
bool not_used_in_distinct;
/// Index condition for BKA access join
Item *cache_idx_cond;
/** HAVING condition for checking prior saving a record into tmp table*/
Item *having;
QEP_operation *op;
/* Tmp table info */
Temp_table_param *tmp_table_param;
/* Sorting related info */
Filesort *filesort;
/**
List of topmost expressions in the select list. The *next* JOIN TAB
in the plan should use it to obtain correct values. Same applicable to
all_fields. These lists are needed because after tmp tables functions
will be turned to fields. These variables are pointing to
tmp_fields_list[123]. Valid only for tmp tables and the last non-tmp
table in the query plan.
@see JOIN::make_tmp_tables_info()
*/
List<Item> *fields;
/** List of all expressions in the select list */
List<Item> *all_fields;
/*
Pointer to the ref array slice which to switch to before sending
records. Valid only for tmp tables.
*/
Ref_ptr_array *ref_array;
/** Number of records saved in tmp table */
ha_rows send_records;
/**
Used for QS_DYNAMIC_RANGE, i.e., "Range checked for each record".
Used by optimizer tracing to decide whether or not dynamic range
analysis of this select has been traced already. If optimizer
trace option DYNAMIC_RANGE is enabled, range analysis will be
traced with different ranges for every record to the left of this
table in the join. If disabled, range analysis will only be traced
for the first range.
*/
bool quick_traced_before;
/// @See m_quick_optim
Item *m_condition_optim;
/**
m_quick is the quick "to be used at this stage of execution".
It can happen that filesort uses the quick (produced by the optimizer) to
produce a sorted result, then the read of this result has to be done
without "quick", so we must reset m_quick to NULL, but we want to delay
freeing of m_quick or it would close the filesort's result and the table
prematurely.
In that case, we move m_quick to m_quick_optim (=> delay deletion), reset
m_quick to NULL (read of filesort's result will be without quick); if
this is a subquery which is later executed a second time,
QEP_TAB::reset() will restore the quick from m_quick_optim into m_quick.
quick_optim stands for "the quick decided by the optimizer".
EXPLAIN reads this member and m_condition_optim; so if you change them
after exposing the plan (setting plan_state), do it with the
LOCK_query_plan mutex.
*/
QUICK_SELECT_I *m_quick_optim;
/**
True if only index is going to be read for this table. This is the
optimizer's decision.
*/
bool m_keyread_optim;
QEP_TAB(const QEP_TAB&); // not defined
QEP_TAB& operator=(const QEP_TAB&); // not defined
};
/**
@returns a pointer to the QEP_TAB whose index is qtab->member. For
example, QEP_AT(x,first_inner) is the first_inner table of x.
*/
#define QEP_AT(qtab,member) (qtab->join()->qep_tab[qtab->member])
/**
Use this class when you need a QEP_TAB not connected to any JOIN_TAB.
*/
class QEP_TAB_standalone : public Sql_alloc
{
public:
QEP_TAB_standalone() { m_qt.set_qs(&m_qs); }
~QEP_TAB_standalone() { m_qt.cleanup(); }
/// @returns access to the QEP_TAB
QEP_TAB &as_QEP_TAB() { return m_qt; }
private:
QEP_shared m_qs;
QEP_TAB m_qt;
};
#endif /* SQL_EXECUTOR_INCLUDED */