mariadb/mysys/mf_soundex.c

107 lines
3.4 KiB
C
Raw Normal View History

/* Copyright (C) 2000 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
2000-07-31 21:29:14 +02:00
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
2000-07-31 21:29:14 +02:00
/****************************************************************
* SOUNDEX ALGORITHM in C *
* *
* The basic Algorithm source is taken from EDN Nov. *
* 14, 1985 pg. 36. *
* *
* As a test Those in Illinois will find that the *
* first group of numbers in their drivers license *
* number is the soundex number for their last name. *
* *
* RHW PC-IBBS ID. #1230 *
* *
* As an extension if remove_garbage is set then all non- *
* alpha characters are skipped *
2004-03-05 18:09:32 +01:00
* *
* Note, that this implementation corresponds to the *
* original version of the algorithm, not to the more *
* popular "enhanced" version, described by Knuth. *
2000-07-31 21:29:14 +02:00
****************************************************************/
#include "mysys_priv.h"
#include <m_ctype.h>
#include "my_static.h"
static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage);
2000-07-31 21:29:14 +02:00
/* outputed string is 4 byte long */
/* out_pntr can be == in_pntr */
void soundex(CHARSET_INFO * cs,register my_string out_pntr, my_string in_pntr,
2000-07-31 21:29:14 +02:00
pbool remove_garbage)
{
char ch,last_ch;
reg3 my_string end;
2002-08-15 16:42:54 +05:00
register uchar *map=cs->to_upper;
2000-07-31 21:29:14 +02:00
if (remove_garbage)
{
while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */
2000-07-31 21:29:14 +02:00
in_pntr++;
}
2002-08-15 16:42:54 +05:00
*out_pntr++ = map[(uchar)*in_pntr]; /* Copy first letter */
last_ch = get_scode(cs,&in_pntr,0); /* code of the first letter */
2000-07-31 21:29:14 +02:00
/* for the first 'double-letter */
/* check. */
end=out_pntr+3; /* Loop on input letters until */
/* end of input (null) or output */
/* letter code count = 3 */
in_pntr++;
while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0)
2000-07-31 21:29:14 +02:00
{
in_pntr++;
if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */
{
*out_pntr++ = ch; /* letter, copy to output */
} /* for next double-letter check */
last_ch = ch; /* save code of last input letter */
}
while (out_pntr < end)
*out_pntr++ = '0';
*out_pntr=0; /* end string */
return;
} /* soundex */
/*
If alpha, map input letter to soundex code.
If not alpha and remove_garbage is set then skip to next char
2000-07-31 21:29:14 +02:00
else return 0
*/
static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage)
2000-07-31 21:29:14 +02:00
{
uchar ch;
if (remove_garbage)
{
while (**ptr && !my_isalpha(cs,**ptr))
2000-07-31 21:29:14 +02:00
(*ptr)++;
}
ch=my_toupper(cs,**ptr);
2000-07-31 21:29:14 +02:00
if (ch < 'A' || ch > 'Z')
{
2004-05-05 17:05:24 +03:00
if (my_isalpha(cs,ch)) /* If extended alfa (country spec) */
2000-07-31 21:29:14 +02:00
return '0'; /* threat as vokal */
return 0; /* Can't map */
}
return(soundex_map[ch-'A']);
} /* get_scode */