/* Copyright (C) 2005 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Implementation of trie and Aho-Corasick automaton. Supports only charsets that can be compared byte-wise. TODO: Add character frequencies. Can increase lookup speed up to 30%. Implement character-wise comparision. */ #include "mysys_priv.h" #include #include #include /* SYNOPSIS TRIE *trie_init (TRIE *trie, CHARSET_INFO *charset); DESCRIPTION Allocates or initializes a `TRIE' object. If `trie' is a `NULL' pointer, the function allocates, initializes, and returns a new object. Otherwise, the object is initialized and the address of the object is returned. If `trie_init()' allocates a new object, it will be freed when `trie_free()' is called. RETURN VALUE An initialized `TRIE*' object. `NULL' if there was insufficient memory to allocate a new object. */ TRIE *trie_init (TRIE *trie, CHARSET_INFO *charset) { MEM_ROOT mem_root; DBUG_ENTER("trie_init"); DBUG_ASSERT(charset); init_alloc_root(&mem_root, (sizeof(TRIE_NODE) * 128) + ALLOC_ROOT_MIN_BLOCK_SIZE, sizeof(TRIE_NODE) * 128); if (! trie) { if (! (trie= (TRIE *)alloc_root(&mem_root, sizeof(TRIE)))) { free_root(&mem_root, MYF(0)); DBUG_RETURN(NULL); } } memcpy(&trie->mem_root, &mem_root, sizeof(MEM_ROOT)); trie->root.leaf= 0; trie->root.c= 0; trie->root.next= NULL; trie->root.links= NULL; trie->root.fail= NULL; trie->charset= charset; trie->nnodes= 0; trie->nwords= 0; DBUG_RETURN(trie); } /* SYNOPSIS void trie_free (TRIE *trie); trie - valid pointer to `TRIE' DESCRIPTION Frees the memory allocated for a `trie'. RETURN VALUE None. */ void trie_free (TRIE *trie) { MEM_ROOT mem_root; DBUG_ENTER("trie_free"); DBUG_ASSERT(trie); memcpy(&mem_root, &trie->mem_root, sizeof(MEM_ROOT)); free_root(&mem_root, MYF(0)); DBUG_VOID_RETURN; } /* SYNOPSIS my_bool trie_insert (TRIE *trie, const byte *key, uint keylen); trie - valid pointer to `TRIE' key - valid pointer to key to insert keylen - non-0 key length DESCRIPTION Inserts new key into trie. RETURN VALUE Upon successful completion, `trie_insert' returns `FALSE'. Otherwise `TRUE' is returned. NOTES If this function fails you must assume `trie' is broken. However it can be freed with trie_free(). */ my_bool trie_insert (TRIE *trie, const byte *key, uint keylen) { TRIE_NODE *node; TRIE_NODE *next; byte p; uint k; DBUG_ENTER("trie_insert"); DBUG_ASSERT(trie && key && keylen); node= &trie->root; trie->root.fail= NULL; for (k= 0; k < keylen; k++) { p= key[k]; for (next= node->links; next; next= next->next) if (next->c == p) break; if (! next) { TRIE_NODE *tmp= (TRIE_NODE *)alloc_root(&trie->mem_root, sizeof(TRIE_NODE)); if (! tmp) DBUG_RETURN(TRUE); tmp->leaf= 0; tmp->c= p; tmp->links= tmp->fail= tmp->next= NULL; trie->nnodes++; if (! node->links) { node->links= tmp; } else { for (next= node->links; next->next; next= next->next) /* no-op */; next->next= tmp; } node= tmp; } else { node= next; } } node->leaf= keylen; trie->nwords++; DBUG_RETURN(FALSE); } /* SYNOPSIS my_bool trie_prepare (TRIE *trie); trie - valid pointer to `TRIE' DESCRIPTION Constructs Aho-Corasick automaton. RETURN VALUE Upon successful completion, `trie_prepare' returns `FALSE'. Otherwise `TRUE' is returned. */ my_bool ac_trie_prepare (TRIE *trie) { TRIE_NODE **tmp_nodes; TRIE_NODE *node; uint32 fnode= 0; uint32 lnode= 0; DBUG_ENTER("trie_prepare"); DBUG_ASSERT(trie); tmp_nodes= (TRIE_NODE **)my_malloc(trie->nnodes * sizeof(TRIE_NODE *), MYF(0)); if (! tmp_nodes) DBUG_RETURN(TRUE); trie->root.fail= &trie->root; for (node= trie->root.links; node; node= node->next) { node->fail= &trie->root; tmp_nodes[lnode++]= node; } while (fnode < lnode) { TRIE_NODE *current= (TRIE_NODE *)tmp_nodes[fnode++]; for (node= current->links; node; node= node->next) { TRIE_NODE *fail= current->fail; tmp_nodes[lnode++]= node; while (! (node->fail= trie_goto(&trie->root, fail, node->c))) fail= fail->fail; } } my_free((gptr)tmp_nodes, MYF(0)); DBUG_RETURN(FALSE); } /* SYNOPSIS void ac_trie_init (TRIE *trie, AC_TRIE_STATE *state); trie - valid pointer to `TRIE' state - value pointer to `AC_TRIE_STATE' DESCRIPTION Initializes `AC_TRIE_STATE' object. */ void ac_trie_init (TRIE *trie, AC_TRIE_STATE *state) { DBUG_ENTER("ac_trie_init"); DBUG_ASSERT(trie && state); state->trie= trie; state->node= &trie->root; DBUG_VOID_RETURN; }