mariadb/storage/myisam/ft_parser.c
Michael Widenius a260b15554 MDEV-4011 Added per thread memory counting and usage
Base code and idea from a patch from by plinux at Taobao.

The idea is that we mark all memory that are thread specific with MY_THREAD_SPECIFIC.
Memory counting is done per thread in the my_malloc_size_cb_func callback function from my_malloc().
There are plenty of new asserts to ensure that for a debug server the counting is correct.

Information_schema.processlist gets two new columns: MEMORY_USED and EXAMINED_ROWS.
- The later is there mainly to show how query is progressing.

The following changes in interfaces was needed to get this to work:
- init_alloc_root() amd init_sql_alloc() has extra option so that one can mark memory with MY_THREAD_SPECIFIC
- One now have to use alloc_root_set_min_malloc() to set min memory to be allocated by alloc_root()
- my_init_dynamic_array()  has extra option so that one can mark memory with MY_THREAD_SPECIFIC
- my_net_init() has extra option so that one can mark memory with MY_THREAD_SPECIFIC
- Added flag for hash_init() so that one can mark hash table to be thread specific.
- Added flags to init_tree() so that one can mark tree to be thread specific.
- Removed with_delete option to init_tree(). Now one should instead use MY_TREE_WITH_DELETE_FLAG.
- Added flag to Warning_info::Warning_info() if the structure should be fully initialized.
- String elements can now be marked as thread specific.
- Internal HEAP tables are now marking it's memory as MY_THREAD_SPECIFIC.
- Changed type of myf from int to ulong, as this is always a set of bit flags.

Other things:
- Removed calls to net_end() and thd->cleanup() as these are now done in ~THD()
- We now also show EXAMINED_ROWS in SHOW PROCESSLIST
- Added new variable 'memory_used'
- Fixed bug where kill_threads_for_user() was using the wrong mem_root to allocate memory.
- Removed calls to the obsoleted function init_dynamic_array()
- Use set_current_thd() instead of my_pthread_setspecific_ptr(THR_THD,...)


client/completion_hash.cc:
  Updated call to init_alloc_root()
client/mysql.cc:
  Updated call to init_alloc_root()
client/mysqlbinlog.cc:
  init_dynamic_array() -> my_init_dynamic_array()
  Updated call to init_alloc_root()
client/mysqlcheck.c:
  Updated call to my_init_dynamic_array()
client/mysqldump.c:
  Updated call to init_alloc_root()
client/mysqltest.cc:
  Updated call to init_alloc_root()
  Updated call to my_init_dynamic_array()
  Fixed compiler warnings
extra/comp_err.c:
  Updated call to my_init_dynamic_array()
extra/resolve_stack_dump.c:
  Updated call to my_init_dynamic_array()
include/hash.h:
  Added HASH_THREAD_SPECIFIC
include/heap.h:
  Added flag is internal temporary table.
include/my_dir.h:
  Safety fix: Ensure that MY_DONT_SORT and MY_WANT_STAT don't interfer with other mysys flags
include/my_global.h:
  Changed type of myf from int to ulong, as this is always a set of bit flags.
include/my_sys.h:
  Added MY_THREAD_SPECIFIC and MY_THREAD_MOVE
  Added malloc_flags to DYNAMIC_ARRAY
  Added extra mysys flag argument to my_init_dynamic_array()
  Removed deprecated functions init_dynamic_array() and my_init_dynamic_array.._ci
  Updated paramaters for init_alloc_root()
include/my_tree.h:
  Added my_flags to allow one to use MY_THREAD_SPECIFIC with hash tables.
  Removed with_delete. One should now instead use MY_TREE_WITH_DELETE_FLAG
  Updated parameters to init_tree()
include/myisamchk.h:
  Added malloc_flags to allow one to use MY_THREAD_SPECIFIC for checks.
include/mysql.h:
  Added MYSQL_THREAD_SPECIFIC_MALLOC
  Used 'unused1' to mark memory as thread specific.
include/mysql.h.pp:
  Updated file
include/mysql_com.h:
  Used 'unused1' to mark memory as thread specific.
  Updated parameters for my_net_init()
libmysql/libmysql.c:
  Updated call to init_alloc_root() to mark memory thread specific.
libmysqld/emb_qcache.cc:
  Updated call to init_alloc_root()
libmysqld/lib_sql.cc:
  Updated call to init_alloc_root()
mysql-test/r/create.result:
  Updated results
mysql-test/r/user_var.result:
  Updated results
mysql-test/suite/funcs_1/datadict/processlist_priv.inc:
  Update to handle new format of SHOW PROCESSLIST
mysql-test/suite/funcs_1/datadict/processlist_val.inc:
  Update to handle new format of SHOW PROCESSLIST
mysql-test/suite/funcs_1/r/is_columns_is.result:
  Update to handle new format of SHOW PROCESSLIST
mysql-test/suite/funcs_1/r/processlist_priv_no_prot.result:
  Updated results
mysql-test/suite/funcs_1/r/processlist_val_no_prot.result:
  Updated results
mysql-test/t/show_explain.test:
  Fixed usage of debug variable so that one can run test with --debug
mysql-test/t/user_var.test:
  Added test of memory_usage variable.
mysys/array.c:
  Added extra my_flags option to init_dynamic_array() and init_dynamic_array2() so that one can mark memory with MY_THREAD_SPECIFIC
  All allocated memory is marked with the given my_flags.
  Removed obsolete function init_dynamic_array()
mysys/default.c:
  Updated call to init_alloc_root()
  Updated call to my_init_dynamic_array()
mysys/hash.c:
  Updated call to my_init_dynamic_array_ci().
  Allocated memory is marked with MY_THREAD_SPECIFIC if HASH_THREAD_SPECIFIC is used.
mysys/ma_dyncol.c:
  init_dynamic_array() -> my_init_dynamic_array()
  Added #if to get rid of compiler warnings
mysys/mf_tempdir.c:
  Updated call to my_init_dynamic_array()
mysys/my_alloc.c:
  Added extra parameter to init_alloc_root() so that one can mark memory with MY_THREAD_SPECIFIC
  Extend MEM_ROOT with a flag if memory is thread specific.
  This is stored in block_size, to keep the size of the MEM_ROOT object identical as before.
  Allocated memory is marked with MY_THREAD_SPECIFIC if used with init_alloc_root()
mysys/my_chmod.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_chsize.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_copy.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_create.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_delete.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_error.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_fopen.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_fstream.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_getwd.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_lib.c:
  Updated call to init_alloc_root()
  Updated call to my_init_dynamic_array()
  Updated DBUG_PRINT because of change of myf type
mysys/my_lock.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_malloc.c:
  Store at start of each allocated memory block the size of the block and if the block is thread specific.
  Call malloc_size_cb_func, if set, with the memory allocated/freed.
  Updated DBUG_PRINT because of change of myf type
mysys/my_open.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_pread.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_read.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_redel.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_rename.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_seek.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_sync.c:
  Updated DBUG_PRINT because of change of myf type
mysys/my_thr_init.c:
  Ensure that one can call my_thread_dbug_id() even if thread is not properly initialized.
mysys/my_write.c:
  Updated DBUG_PRINT because of change of myf type
mysys/mysys_priv.h:
  Updated parameters to sf_malloc and sf_realloc()
mysys/safemalloc.c:
  Added checking that for memory marked with MY_THREAD_SPECIFIC that it's the same thread that is allocation and freeing the memory.
  Added sf_malloc_dbug_id() to allow MariaDB to specify which THD is handling the memory.
  Added my_flags arguments to sf_malloc() and sf_realloc() to be able to mark memory with MY_THREAD_SPECIFIC.
  Added sf_report_leaked_memory() to get list of memory not freed by a thread.
mysys/tree.c:
  Added flags to init_tree() so that one can mark tree to be thread specific.
  Removed with_delete option to init_tree(). Now one should instead use MY_TREE_WITH_DELETE_FLAG.
  Updated call to init_alloc_root()
  All allocated memory is marked with the given malloc flags
mysys/waiting_threads.c:
  Updated call to my_init_dynamic_array()
sql-common/client.c:
  Updated call to init_alloc_root() and my_net_init() to mark memory thread specific.
  Updated call to my_init_dynamic_array().
  Added MYSQL_THREAD_SPECIFIC_MALLOC so that client can mark memory as MY_THREAD_SPECIFIC.
sql-common/client_plugin.c:
  Updated call to init_alloc_root()
sql/debug_sync.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/event_scheduler.cc:
  Removed calls to net_end() as this is now done in ~THD()
  Call set_current_thd() to ensure that memory is assigned to right thread.
sql/events.cc:
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/filesort.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/filesort_utils.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/ha_ndbcluster.cc:
  Updated call to init_alloc_root()
  Updated call to my_net_init()
  Removed calls to net_end() and thd->cleanup() as these are now done in ~THD()
sql/ha_ndbcluster_binlog.cc:
  Updated call to my_net_init()
  Updated call to init_sql_alloc()
  Removed calls to net_end() and thd->cleanup() as these are now done in ~THD()
sql/ha_partition.cc:
  Updated call to init_alloc_root()
sql/handler.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
  Added missing call to my_dir_end()
sql/item_func.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/item_subselect.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/item_sum.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/log.cc:
  More DBUG
  Updated call to init_alloc_root()
sql/mdl.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/mysqld.cc:
  Added total_memory_used
  Updated call to init_alloc_root()
  Move mysql_cond_broadcast() before my_thread_end()
  Added mariadb_dbug_id() to count memory per THD instead of per thread.
  Added my_malloc_size_cb_func() callback function for my_malloc() to count memory.
  Move initialization of mysqld_server_started and mysqld_server_initialized earlier.
  Updated call to my_init_dynamic_array().
  Updated call to my_net_init().
  Call my_pthread_setspecific_ptr(THR_THD,...) to ensure that memory is assigned to right thread.
  Added status variable 'memory_used'.
  Updated call to init_alloc_root()
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/mysqld.h:
  Added set_current_thd()
sql/net_serv.cc:
  Added new parameter to my_net_init() so that one can mark memory with MY_THREAD_SPECIFIC.
  Store in net->thread_specific_malloc if memory is thread specific.
  Mark memory to be thread specific if requested.
sql/opt_range.cc:
  Updated call to my_init_dynamic_array()
  Updated call to init_sql_alloc()
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/opt_subselect.cc:
  Updated call to init_sql_alloc() to mark memory thread specific.
sql/protocol.cc:
  Fixed compiler warning
sql/records.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/rpl_filter.cc:
  Updated call to my_init_dynamic_array()
sql/rpl_handler.cc:
  Updated call to my_init_dynamic_array2()
sql/rpl_handler.h:
  Updated call to init_sql_alloc()
sql/rpl_mi.cc:
  Updated call to my_init_dynamic_array()
sql/rpl_tblmap.cc:
  Updated call to init_alloc_root()
sql/rpl_utility.cc:
  Updated call to my_init_dynamic_array()
sql/slave.cc:
  Initialize things properly before calling functions that allocate memory.
  Removed calls to net_end() as this is now done in ~THD()
sql/sp_head.cc:
  Updated call to init_sql_alloc()
  Updated call to my_init_dynamic_array()
  Added parameter to warning_info() that it should be fully initialized.
sql/sp_pcontext.cc:
  Updated call to my_init_dynamic_array()
sql/sql_acl.cc:
  Updated call to init_sql_alloc()
  Updated call to my_init_dynamic_array()
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_admin.cc:
  Added parameter to warning_info() that it should be fully initialized.
sql/sql_analyse.h:
  Updated call to init_tree() to mark memory thread specific.
sql/sql_array.h:
  Updated call to my_init_dynamic_array() to mark memory thread specific.
sql/sql_audit.cc:
  Updated call to my_init_dynamic_array()
sql/sql_base.cc:
  Updated call to init_sql_alloc()
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_cache.cc:
  Updated comment
sql/sql_class.cc:
  Added parameter to warning_info() that not initialize it until THD is fully created.
  Updated call to init_sql_alloc()
  Mark THD::user_vars has to be thread specific.
  Updated call to my_init_dynamic_array()
  Ensure that memory allocated by THD is assigned to the THD.
  More DBUG
  Always acll net_end() in ~THD()
  Assert that all memory signed to this THD is really deleted at ~THD.
  Fixed set_status_var_init() to not reset memory_used.
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_class.h:
  Added MY_THREAD_SPECIFIC to allocated memory.
  Added malloc_size to THD to record allocated memory per THD.
sql/sql_delete.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/sql_error.cc:
  Added 'initialize' parameter to Warning_info() to say if should allocate memory for it's structures.
  This is used by THD::THD() to not allocate memory until THD is ready.
  Added Warning_info::free_memory()
sql/sql_error.h:
  Updated Warning_info() class.
sql/sql_handler.cc:
  Updated call to init_alloc_root() to mark memory thread specific.
sql/sql_insert.cc:
  More DBUG
sql/sql_join_cache.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/sql_lex.cc:
  Updated call to my_init_dynamic_array()
sql/sql_lex.h:
  Updated call to my_init_dynamic_array()
sql/sql_load.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/sql_parse.cc:
  Removed calls to net_end() and thd->cleanup() as these are now done in ~THD()
  Ensure that examined_row_count() is reset before query.
  Fixed bug where kill_threads_for_user() was using the wrong mem_root to allocate memory.
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
  Don't restore thd->status_var.memory_used when restoring thd->status_var
sql/sql_plugin.cc:
  Updated call to init_alloc_root()
  Updated call to my_init_dynamic_array()
  Don't allocate THD on the stack, as this causes problems with valgrind when doing thd memory counting.
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_prepare.cc:
  Added parameter to warning_info() that it should be fully initialized.
  Updated call to init_sql_alloc() to mark memory thread specific.
sql/sql_reload.cc:
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_select.cc:
  Updated call to my_init_dynamic_array() and init_sql_alloc() to mark memory thread specific.
  Added MY_THREAD_SPECIFIC to allocated memory.
  More DBUG
sql/sql_servers.cc:
  Updated call to init_sql_alloc() to mark memory some memory thread specific.
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_show.cc:
  Updated call to my_init_dynamic_array()
  Mark my_dir() memory thread specific.
  Use my_pthread_setspecific_ptr(THR_THD,...) to mark that allocated memory should be allocated to calling thread.
  More DBUG.
  Added malloc_size and examined_row_count to SHOW PROCESSLIST.
  Added MY_THREAD_SPECIFIC to allocated memory.
  Updated call to init_sql_alloc()
  Added parameter to warning_info() that it should be fully initialized.
sql/sql_statistics.cc:
  Fixed compiler warning
sql/sql_string.cc:
  String elements can now be marked as thread specific.
sql/sql_string.h:
  String elements can now be marked as thread specific.
sql/sql_table.cc:
  Updated call to init_sql_alloc() and my_malloc() to mark memory thread specific
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
  Fixed compiler warning
sql/sql_test.cc:
  Updated call to my_init_dynamic_array() to mark memory thread specific.
sql/sql_trigger.cc:
  Updated call to init_sql_alloc()
sql/sql_udf.cc:
  Updated call to init_sql_alloc()
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_update.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
sql/table.cc:
  Updated call to init_sql_alloc().
  Mark memory used by temporary tables, that are not for slave threads, as MY_THREAD_SPECIFIC
  Updated call to init_sql_alloc()
sql/thr_malloc.cc:
  Added my_flags argument to init_sql_alloc() to be able to mark memory as MY_THREAD_SPECIFIC.
sql/thr_malloc.h:
  Updated prototype for init_sql_alloc()
sql/tztime.cc:
  Updated call to init_sql_alloc()
  Updated call to init_alloc_root() to mark memory thread specific.
  my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/uniques.cc:
  Updated calls to init_tree(), my_init_dynamic_array() and my_malloc() to mark memory thread specific.
sql/unireg.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
storage/csv/ha_tina.cc:
  Updated call to init_alloc_root()
storage/federated/ha_federated.cc:
  Updated call to init_alloc_root()
  Updated call to my_init_dynamic_array()
  Ensure that memory allocated by fedarated is registered for the system, not for the thread.
storage/federatedx/federatedx_io_mysql.cc:
  Updated call to my_init_dynamic_array()
storage/federatedx/ha_federatedx.cc:
  Updated call to init_alloc_root()
  Updated call to my_init_dynamic_array()
storage/heap/ha_heap.cc:
  Added MY_THREAD_SPECIFIC to allocated memory.
storage/heap/heapdef.h:
  Added parameter to hp_get_new_block() to be able to do thread specific memory tagging.
storage/heap/hp_block.c:
  Added parameter to hp_get_new_block() to be able to do thread specific memory tagging.
storage/heap/hp_create.c:
  - Internal HEAP tables are now marking it's memory as MY_THREAD_SPECIFIC.
  - Use MY_TREE_WITH_DELETE instead of removed option 'with_delete'.
storage/heap/hp_open.c:
  Internal HEAP tables are now marking it's memory as MY_THREAD_SPECIFIC.
storage/heap/hp_write.c:
  Added new parameter to hp_get_new_block()
storage/maria/ma_bitmap.c:
  Updated call to my_init_dynamic_array()
storage/maria/ma_blockrec.c:
  Updated call to my_init_dynamic_array()
storage/maria/ma_check.c:
  Updated call to init_alloc_root()
storage/maria/ma_ft_boolean_search.c:
  Updated calls to init_tree() and init_alloc_root()
storage/maria/ma_ft_nlq_search.c:
  Updated call to init_tree()
storage/maria/ma_ft_parser.c:
  Updated call to init_tree()
  Updated call to init_alloc_root()
storage/maria/ma_loghandler.c:
  Updated call to my_init_dynamic_array()
storage/maria/ma_open.c:
  Updated call to my_init_dynamic_array()
storage/maria/ma_sort.c:
  Updated call to my_init_dynamic_array()
storage/maria/ma_write.c:
  Updated calls to my_init_dynamic_array() and init_tree()
storage/maria/maria_pack.c:
  Updated call to init_tree()
storage/maria/unittest/sequence_storage.c:
  Updated call to my_init_dynamic_array()
storage/myisam/ft_boolean_search.c:
  Updated call to init_tree()
  Updated call to init_alloc_root()
storage/myisam/ft_nlq_search.c:
  Updated call to init_tree()
storage/myisam/ft_parser.c:
  Updated call to init_tree()
  Updated call to init_alloc_root()
storage/myisam/ft_stopwords.c:
  Updated call to init_tree()
storage/myisam/mi_check.c:
  Updated call to init_alloc_root()
storage/myisam/mi_write.c:
  Updated call to my_init_dynamic_array()
  Updated call to init_tree()
storage/myisam/myisamlog.c:
  Updated call to init_tree()
storage/myisam/myisampack.c:
  Updated call to init_tree()
storage/myisam/sort.c:
  Updated call to my_init_dynamic_array()
storage/myisammrg/ha_myisammrg.cc:
  Updated call to init_sql_alloc()
storage/perfschema/pfs_check.cc:
  Rest current_thd
storage/perfschema/pfs_instr.cc:
  Removed DBUG_ENTER/DBUG_VOID_RETURN as at this point my_thread_var is not allocated anymore, which can cause problems.
support-files/compiler_warnings.supp:
  Disable compiler warning from offsetof macro.
2013-01-23 16:16:14 +01:00

411 lines
11 KiB
C

/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
#include "ftdefs.h"
typedef struct st_ft_docstat {
FT_WORD *list;
uint uniq;
double sum;
} FT_DOCSTAT;
typedef struct st_my_ft_parser_param
{
TREE *wtree;
MEM_ROOT *mem_root;
} MY_FT_PARSER_PARAM;
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
(uchar*) w2->pos, w2->len, 0, 0);
}
static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
{
word->weight=LWS_IN_USE;
docstat->sum+=word->weight;
memcpy((docstat->list)++, word, sizeof(FT_WORD));
return 0;
}
/* transforms tree of words into the array, applying normalization */
FT_WORD * ft_linearize(TREE *wtree, MEM_ROOT *mem_root)
{
FT_WORD *wlist,*p;
FT_DOCSTAT docstat;
DBUG_ENTER("ft_linearize");
if ((wlist=(FT_WORD *) alloc_root(mem_root, sizeof(FT_WORD)*
(1+wtree->elements_in_tree))))
{
docstat.list=wlist;
docstat.uniq=wtree->elements_in_tree;
docstat.sum=0;
tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right);
}
delete_tree(wtree);
if (!wlist)
DBUG_RETURN(NULL);
docstat.list->pos=NULL;
for (p=wlist;p->pos;p++)
{
p->weight=PRENORM_IN_USE;
}
for (p=wlist;p->pos;p++)
{
p->weight/=NORM_IN_USE;
}
DBUG_RETURN(wlist);
}
my_bool ft_boolean_check_syntax_string(const uchar *str)
{
uint i, j;
if (!str ||
(strlen((char*) str)+1 != sizeof(DEFAULT_FTB_SYNTAX)) ||
(str[0] != ' ' && str[1] != ' '))
return 1;
for (i=0; i<sizeof(DEFAULT_FTB_SYNTAX); i++)
{
/* limiting to 7-bit ascii only */
if ((unsigned char)(str[i]) > 127 || my_isalnum(default_charset_info, str[i]))
return 1;
for (j=0; j<i; j++)
if (str[i] == str[j] && (i != 11 || j != 10))
return 1;
}
return 0;
}
/*
RETURN VALUE
0 - eof
1 - word found
2 - left bracket
3 - right bracket
4 - stopword found
*/
uchar ft_get_word(CHARSET_INFO *cs, const uchar **start, const uchar *end,
FT_WORD *word, MYSQL_FTPARSER_BOOLEAN_INFO *param)
{
const uchar *doc=*start;
int ctype;
uint mwc, length;
int mbl;
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
param->weight_adjust= param->wasign= 0;
param->type= FT_TOKEN_EOF;
while (doc<end)
{
for (; doc < end; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc))
break;
if (*doc == FTB_RQUOT && param->quot)
{
*start=doc+1;
param->type= FT_TOKEN_RIGHT_PAREN;
goto ret;
}
if (!param->quot)
{
if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
{
/* param->prev=' '; */
*start=doc+1;
if (*doc == FTB_LQUOT)
param->quot= (char*) 1;
param->type= (*doc == FTB_RBR ? FT_TOKEN_RIGHT_PAREN : FT_TOKEN_LEFT_PAREN);
goto ret;
}
if (param->prev == ' ')
{
if (*doc == FTB_YES ) { param->yesno=+1; continue; } else
if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else
if (*doc == FTB_NO ) { param->yesno=-1; continue; } else
if (*doc == FTB_INC ) { param->weight_adjust++; continue; } else
if (*doc == FTB_DEC ) { param->weight_adjust--; continue; } else
if (*doc == FTB_NEG ) { param->wasign= !param->wasign; continue; }
}
}
param->prev=*doc;
param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0);
param->weight_adjust= param->wasign= 0;
}
mwc=length=0;
for (word->pos= doc; doc < end; length++,
doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc))
mwc=0;
else if (!misc_word_char(*doc) || mwc)
break;
else
mwc++;
}
param->prev='A'; /* be sure *prev is true_word_char */
word->len= (uint)(doc-word->pos) - mwc;
if ((param->trunc=(doc<end && *doc == FTB_TRUNC)))
doc++;
if (((length >= ft_min_word_len && !is_stopword((char*) word->pos,
word->len))
|| param->trunc) && length < ft_max_word_len)
{
*start=doc;
param->type= FT_TOKEN_WORD;
goto ret;
}
else if (length) /* make sure length > 0 (if start contains spaces only) */
{
*start= doc;
param->type= FT_TOKEN_STOPWORD;
goto ret;
}
}
if (param->quot)
{
*start= doc;
param->type= 3; /* FT_RBR */
goto ret;
}
ret:
return param->type;
}
uchar ft_simple_get_word(CHARSET_INFO *cs, uchar **start, const uchar *end,
FT_WORD *word, my_bool skip_stopwords)
{
uchar *doc= *start;
uint mwc, length;
int mbl;
int ctype;
DBUG_ENTER("ft_simple_get_word");
do
{
for (;; doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{
if (doc >= end)
DBUG_RETURN(0);
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc))
break;
}
mwc= length= 0;
for (word->pos= doc; doc < end; length++,
doc+= (mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1)))
{
mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end);
if (true_word_char(ctype, *doc))
mwc= 0;
else if (!misc_word_char(*doc) || mwc)
break;
else
mwc++;
}
word->len= (uint)(doc-word->pos) - mwc;
if (skip_stopwords == FALSE ||
(length >= ft_min_word_len && length < ft_max_word_len &&
!is_stopword((char*) word->pos, word->len)))
{
*start= doc;
DBUG_RETURN(1);
}
} while (doc < end);
DBUG_RETURN(0);
}
void ft_parse_init(TREE *wtree, CHARSET_INFO *cs)
{
DBUG_ENTER("ft_parse_init");
if (!is_tree_inited(wtree))
init_tree(wtree, 0, 0, sizeof(FT_WORD), (qsort_cmp2)&FT_WORD_cmp, 0,
(void*)cs, 0);
DBUG_VOID_RETURN;
}
static int ft_add_word(MYSQL_FTPARSER_PARAM *param,
const char *word, int word_len,
MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
{
TREE *wtree;
FT_WORD w;
MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam;
DBUG_ENTER("ft_add_word");
wtree= ft_param->wtree;
if (param->flags & MYSQL_FTFLAGS_NEED_COPY)
{
uchar *ptr;
DBUG_ASSERT(wtree->with_delete == 0);
ptr= (uchar *)alloc_root(ft_param->mem_root, word_len);
memcpy(ptr, word, word_len);
w.pos= ptr;
}
else
w.pos= (uchar*) word;
w.len= word_len;
if (!tree_insert(wtree, &w, 0, wtree->custom_arg))
{
delete_tree(wtree);
DBUG_RETURN(1);
}
DBUG_RETURN(0);
}
static int ft_parse_internal(MYSQL_FTPARSER_PARAM *param,
const char *doc_arg, int doc_len)
{
uchar *doc= (uchar*) doc_arg;
uchar *end= doc + doc_len;
MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam;
TREE *wtree= ft_param->wtree;
FT_WORD w;
DBUG_ENTER("ft_parse_internal");
while (ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE))
if (param->mysql_add_word(param, (char*) w.pos, w.len, 0))
DBUG_RETURN(1);
DBUG_RETURN(0);
}
int ft_parse(TREE *wtree, uchar *doc, int doclen,
struct st_mysql_ftparser *parser,
MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root)
{
MY_FT_PARSER_PARAM my_param;
DBUG_ENTER("ft_parse");
DBUG_ASSERT(parser);
my_param.wtree= wtree;
my_param.mem_root= mem_root;
param->mysql_parse= ft_parse_internal;
param->mysql_add_word= ft_add_word;
param->mysql_ftparam= &my_param;
param->cs= wtree->custom_arg;
param->doc= (char*) doc;
param->length= doclen;
param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
DBUG_RETURN(parser->parse(param));
}
#define MAX_PARAM_NR 2
MYSQL_FTPARSER_PARAM* ftparser_alloc_param(MI_INFO *info)
{
if (!info->ftparser_param)
{
/*
. info->ftparser_param can not be zero after the initialization,
because it always includes built-in fulltext parser. And built-in
parser can be called even if the table has no fulltext indexes and
no varchar/text fields.
ftb_find_relevance... parser (ftb_find_relevance_parse,
ftb_find_relevance_add_word) calls ftb_check_phrase... parser
(ftb_check_phrase_internal, ftb_phrase_add_word). Thus MAX_PARAM_NR=2.
*/
info->ftparser_param= (MYSQL_FTPARSER_PARAM *)
my_malloc(MAX_PARAM_NR * sizeof(MYSQL_FTPARSER_PARAM) *
info->s->ftkeys, MYF(MY_WME | MY_ZEROFILL));
init_alloc_root(&info->ft_memroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0, 0);
}
return info->ftparser_param;
}
MYSQL_FTPARSER_PARAM *ftparser_call_initializer(MI_INFO *info,
uint keynr, uint paramnr)
{
uint32 ftparser_nr;
struct st_mysql_ftparser *parser;
if (!ftparser_alloc_param(info))
return 0;
if (keynr == NO_SUCH_KEY)
{
ftparser_nr= 0;
parser= &ft_default_parser;
}
else
{
ftparser_nr= info->s->keyinfo[keynr].ftkey_nr;
parser= info->s->keyinfo[keynr].parser;
}
DBUG_ASSERT(paramnr < MAX_PARAM_NR);
ftparser_nr= ftparser_nr*MAX_PARAM_NR + paramnr;
if (! info->ftparser_param[ftparser_nr].mysql_add_word)
{
/* Note, that mysql_add_word is used here as a flag:
mysql_add_word == 0 - parser is not initialized
mysql_add_word != 0 - parser is initialized, or no
initialization needed. */
info->ftparser_param[ftparser_nr].mysql_add_word=
(int (*)(struct st_mysql_ftparser_param *, const char *, int,
MYSQL_FTPARSER_BOOLEAN_INFO *)) 1;
if (parser->init && parser->init(&info->ftparser_param[ftparser_nr]))
return 0;
}
return &info->ftparser_param[ftparser_nr];
}
void ftparser_call_deinitializer(MI_INFO *info)
{
uint i, j, keys= info->s->state.header.keys;
free_root(&info->ft_memroot, MYF(0));
if (! info->ftparser_param)
return;
for (i= 0; i < keys; i++)
{
MI_KEYDEF *keyinfo= &info->s->keyinfo[i];
for (j=0; j < MAX_PARAM_NR; j++)
{
MYSQL_FTPARSER_PARAM *ftparser_param=
&info->ftparser_param[keyinfo->ftkey_nr * MAX_PARAM_NR + j];
if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word)
{
if (keyinfo->parser->deinit)
keyinfo->parser->deinit(ftparser_param);
ftparser_param->mysql_add_word= 0;
}
else
break;
}
}
}