mirror of
https://github.com/MariaDB/server.git
synced 2025-01-17 20:42:30 +01:00
46922b5125
storage/maria/Makefile.am: GPL license update storage/maria/ft_maria.c: GPL license update storage/maria/ha_maria.cc: GPL license update storage/maria/ha_maria.h: GPL license update storage/maria/lockman.c: GPL license update storage/maria/lockman.h: GPL license update storage/maria/ma_bitmap.c: GPL license update storage/maria/ma_blockrec.c: GPL license update storage/maria/ma_blockrec.h: GPL license update storage/maria/ma_cache.c: GPL license update storage/maria/ma_changed.c: GPL license update storage/maria/ma_check.c: GPL license update storage/maria/ma_checkpoint.c: GPL license update storage/maria/ma_checkpoint.h: GPL license update storage/maria/ma_checksum.c: GPL license update storage/maria/ma_close.c: GPL license update storage/maria/ma_control_file.c: GPL license update storage/maria/ma_control_file.h: GPL license update storage/maria/ma_create.c: GPL license update storage/maria/ma_dbug.c: GPL license update storage/maria/ma_delete.c: GPL license update storage/maria/ma_delete_all.c: GPL license update storage/maria/ma_delete_table.c: GPL license update storage/maria/ma_dynrec.c: GPL license update storage/maria/ma_extra.c: GPL license update storage/maria/ma_ft_boolean_search.c: GPL license update storage/maria/ma_ft_eval.c: GPL license update storage/maria/ma_ft_eval.h: GPL license update storage/maria/ma_ft_nlq_search.c: GPL license update storage/maria/ma_ft_parser.c: GPL license update storage/maria/ma_ft_stem.c: GPL license update storage/maria/ma_ft_test1.c: GPL license update storage/maria/ma_ft_test1.h: GPL license update storage/maria/ma_ft_update.c: GPL license update storage/maria/ma_ftdefs.h: GPL license update storage/maria/ma_fulltext.h: GPL license update storage/maria/ma_info.c: GPL license update storage/maria/ma_init.c: GPL license update storage/maria/ma_key.c: GPL license update storage/maria/ma_keycache.c: GPL license update storage/maria/ma_least_recently_dirtied.c: GPL license update storage/maria/ma_least_recently_dirtied.h: GPL license update storage/maria/ma_locking.c: GPL license update storage/maria/ma_open.c: GPL license update storage/maria/ma_packrec.c: GPL license update storage/maria/ma_page.c: GPL license update storage/maria/ma_panic.c: GPL license update storage/maria/ma_preload.c: GPL license update storage/maria/ma_range.c: GPL license update storage/maria/ma_recovery.c: GPL license update storage/maria/ma_recovery.h: GPL license update storage/maria/ma_rename.c: GPL license update storage/maria/ma_rfirst.c: GPL license update storage/maria/ma_rkey.c: GPL license update storage/maria/ma_rlast.c: GPL license update storage/maria/ma_rnext.c: GPL license update storage/maria/ma_rnext_same.c: GPL license update storage/maria/ma_rprev.c: GPL license update storage/maria/ma_rrnd.c: GPL license update storage/maria/ma_rsame.c: GPL license update storage/maria/ma_rsamepos.c: GPL license update storage/maria/ma_rt_index.c: GPL license update storage/maria/ma_rt_index.h: GPL license update storage/maria/ma_rt_key.c: GPL license update storage/maria/ma_rt_key.h: GPL license update storage/maria/ma_rt_mbr.c: GPL license update storage/maria/ma_rt_mbr.h: GPL license update storage/maria/ma_rt_split.c: GPL license update storage/maria/ma_rt_test.c: GPL license update storage/maria/ma_scan.c: GPL license update storage/maria/ma_search.c: GPL license update storage/maria/ma_sort.c: GPL license update storage/maria/ma_sp_defs.h: GPL license update storage/maria/ma_sp_key.c: GPL license update storage/maria/ma_sp_test.c: GPL license update storage/maria/ma_static.c: GPL license update storage/maria/ma_statrec.c: GPL license update storage/maria/ma_test1.c: GPL license update storage/maria/ma_test2.c: GPL license update storage/maria/ma_test3.c: GPL license update storage/maria/ma_unique.c: GPL license update storage/maria/ma_update.c: GPL license update storage/maria/ma_write.c: GPL license update storage/maria/maria_chk.c: GPL license update storage/maria/maria_def.h: GPL license update storage/maria/maria_ftdump.c: GPL license update storage/maria/maria_pack.c: GPL license update storage/maria/tablockman.c: GPL license update storage/maria/tablockman.h: GPL license update storage/maria/trnman.c: GPL license update storage/maria/trnman.h: GPL license update
152 lines
6.2 KiB
C
152 lines
6.2 KiB
C
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
|
|
|
|
/* some definitions for full-text indices */
|
|
|
|
#include "ma_fulltext.h"
|
|
#include <m_ctype.h>
|
|
#include <my_tree.h>
|
|
#include <queues.h>
|
|
#include <mysql/plugin.h>
|
|
|
|
#define true_word_char(ctype, character) \
|
|
((ctype) & (_MY_U | _MY_L | _MY_NMR) || \
|
|
(character) == '_')
|
|
#define misc_word_char(X) 0
|
|
|
|
#define FT_MAX_WORD_LEN_FOR_SORT 31
|
|
|
|
#define FTPARSER_MEMROOT_ALLOC_SIZE 65536
|
|
|
|
#define COMPILE_STOPWORDS_IN
|
|
|
|
/* Interested readers may consult SMART
|
|
(ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z)
|
|
for an excellent implementation of vector space model we use.
|
|
It also demonstrate the usage of different weghting techniques.
|
|
This code, though, is completely original and is not based on the
|
|
SMART code but was in some cases inspired by it.
|
|
|
|
NORM_PIVOT was taken from the article
|
|
A.Singhal, C.Buckley, M.Mitra, "Pivoted Document Length Normalization",
|
|
ACM SIGIR'96, 21-29, 1996
|
|
*/
|
|
|
|
#define LWS_FOR_QUERY LWS_TF
|
|
#define LWS_IN_USE LWS_LOG
|
|
#define PRENORM_IN_USE PRENORM_AVG
|
|
#define NORM_IN_USE NORM_PIVOT
|
|
#define GWS_IN_USE GWS_PROB
|
|
/*==============================================================*/
|
|
#define LWS_TF (count)
|
|
#define LWS_BINARY (count>0)
|
|
#define LWS_SQUARE (count*count)
|
|
#define LWS_LOG (count?(log( (double) count)+1):0)
|
|
/*--------------------------------------------------------------*/
|
|
#define PRENORM_NONE (p->weight)
|
|
#define PRENORM_MAX (p->weight/docstat.max)
|
|
#define PRENORM_AUG (0.4+0.6*p->weight/docstat.max)
|
|
#define PRENORM_AVG (p->weight/docstat.sum*docstat.uniq)
|
|
#define PRENORM_AVGLOG ((1+log(p->weight))/(1+log(docstat.sum/docstat.uniq)))
|
|
/*--------------------------------------------------------------*/
|
|
#define NORM_NONE (1)
|
|
#define NORM_SUM (docstat.nsum)
|
|
#define NORM_COS (sqrt(docstat.nsum2))
|
|
|
|
#define PIVOT_VAL (0.0115)
|
|
#define NORM_PIVOT (1+PIVOT_VAL*docstat.uniq)
|
|
/*---------------------------------------------------------------*/
|
|
#define GWS_NORM (1/sqrt(sum2))
|
|
#define GWS_GFIDF (sum/doc_cnt)
|
|
/* Mysterious, but w/o (double) GWS_IDF performs better :-o */
|
|
#define GWS_IDF log(aio->info->state->records/doc_cnt)
|
|
#define GWS_IDF1 log((double)aio->info->state->records/doc_cnt)
|
|
#define GWS_PROB ((aio->info->state->records > doc_cnt) ? log(((double)(aio->info->state->records-doc_cnt))/doc_cnt) : 0 )
|
|
#define GWS_FREQ (1.0/doc_cnt)
|
|
#define GWS_SQUARED pow(log((double)aio->info->state->records/doc_cnt),2)
|
|
#define GWS_CUBIC pow(log((double)aio->info->state->records/doc_cnt),3)
|
|
#define GWS_ENTROPY (1-(suml/sum-log(sum))/log(aio->info->state->records))
|
|
/*=================================================================*/
|
|
|
|
/* Boolean search operators */
|
|
#define FTB_YES (ft_boolean_syntax[0])
|
|
#define FTB_EGAL (ft_boolean_syntax[1])
|
|
#define FTB_NO (ft_boolean_syntax[2])
|
|
#define FTB_INC (ft_boolean_syntax[3])
|
|
#define FTB_DEC (ft_boolean_syntax[4])
|
|
#define FTB_LBR (ft_boolean_syntax[5])
|
|
#define FTB_RBR (ft_boolean_syntax[6])
|
|
#define FTB_NEG (ft_boolean_syntax[7])
|
|
#define FTB_TRUNC (ft_boolean_syntax[8])
|
|
#define FTB_LQUOT (ft_boolean_syntax[10])
|
|
#define FTB_RQUOT (ft_boolean_syntax[11])
|
|
|
|
typedef struct st_maria_ft_word {
|
|
byte * pos;
|
|
uint len;
|
|
double weight;
|
|
} FT_WORD;
|
|
|
|
int is_stopword(char *word, uint len);
|
|
|
|
uint _ma_ft_make_key(MARIA_HA *, uint , byte *, FT_WORD *, my_off_t);
|
|
|
|
byte maria_ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *,
|
|
MYSQL_FTPARSER_BOOLEAN_INFO *);
|
|
byte maria_ft_simple_get_word(CHARSET_INFO *, byte **, const byte *,
|
|
FT_WORD *, my_bool);
|
|
|
|
typedef struct _st_maria_ft_seg_iterator {
|
|
uint num, len;
|
|
HA_KEYSEG *seg;
|
|
const byte *rec, *pos;
|
|
} FT_SEG_ITERATOR;
|
|
|
|
void _ma_ft_segiterator_init(MARIA_HA *, uint, const byte *, FT_SEG_ITERATOR *);
|
|
void _ma_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *);
|
|
uint _ma_ft_segiterator(FT_SEG_ITERATOR *);
|
|
|
|
void maria_ft_parse_init(TREE *, CHARSET_INFO *);
|
|
int maria_ft_parse(TREE *, byte *, int, struct st_mysql_ftparser *parser,
|
|
MYSQL_FTPARSER_PARAM *, MEM_ROOT *);
|
|
FT_WORD * maria_ft_linearize(TREE *, MEM_ROOT *);
|
|
FT_WORD * _ma_ft_parserecord(MARIA_HA *, uint, const byte *, MEM_ROOT *);
|
|
uint _ma_ft_parse(TREE *, MARIA_HA *, uint, const byte *,
|
|
MYSQL_FTPARSER_PARAM *, MEM_ROOT *);
|
|
|
|
FT_INFO *maria_ft_init_nlq_search(MARIA_HA *, uint, byte *, uint, uint, byte *);
|
|
FT_INFO *maria_ft_init_boolean_search(MARIA_HA *, uint, byte *, uint, CHARSET_INFO *);
|
|
|
|
extern const struct _ft_vft _ma_ft_vft_nlq;
|
|
int maria_ft_nlq_read_next(FT_INFO *, char *);
|
|
float maria_ft_nlq_find_relevance(FT_INFO *, byte *, uint);
|
|
void maria_ft_nlq_close_search(FT_INFO *);
|
|
float maria_ft_nlq_get_relevance(FT_INFO *);
|
|
my_off_t maria_ft_nlq_get_docid(FT_INFO *);
|
|
void maria_ft_nlq_reinit_search(FT_INFO *);
|
|
|
|
extern const struct _ft_vft _ma_ft_vft_boolean;
|
|
int maria_ft_boolean_read_next(FT_INFO *, char *);
|
|
float maria_ft_boolean_find_relevance(FT_INFO *, byte *, uint);
|
|
void maria_ft_boolean_close_search(FT_INFO *);
|
|
float maria_ft_boolean_get_relevance(FT_INFO *);
|
|
my_off_t maria_ft_boolean_get_docid(FT_INFO *);
|
|
void maria_ft_boolean_reinit_search(FT_INFO *);
|
|
extern MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info,
|
|
uint keynr,
|
|
uint paramnr);
|
|
extern void maria_ftparser_call_deinitializer(MARIA_HA *info);
|