merge -r10340 from trunk feature:2248 Improve quick/query search results
git-svn-id: http://piwigo.org/svn/branches/2.2@10427 68402e56-0260-453c-a942-63ccdbb3a9ee
This commit is contained in:
parent
bc1cf1e420
commit
3594e552d7
1 changed files with 187 additions and 59 deletions
|
@ -265,17 +265,34 @@ SELECT DISTINCT(id)
|
||||||
return $items;
|
return $items;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (function_exists('mb_strtolower'))
|
||||||
|
{
|
||||||
|
function transliterate($term)
|
||||||
|
{
|
||||||
|
return remove_accents( mb_strtolower($term) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
function transliterate($term)
|
||||||
|
{
|
||||||
|
return remove_accents( strtolower($term) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function is_word_char($ch)
|
||||||
|
{
|
||||||
|
return ($ch>='0' && $ch<='9') || ($ch>='a' && $ch<='z') || ($ch>='A' && $ch<='Z') || ord($ch)>127;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns the LIKE sql clause corresponding to the quick search query $q
|
* analyzes and splits the quick/query search query $q into tokens
|
||||||
* and the field $field. example q='john bill', field='file' will return
|
* q='john bill' => 2 tokens 'john' 'bill'
|
||||||
* file LIKE '%john%' OR file LIKE '%bill%'. Special characters for MySql full
|
* Special characters for MySql full text search (+,<,>,~) appear in the token modifiers.
|
||||||
* text search (+,<,>,~) are omitted. The query can contain a phrase:
|
* The query can contain a phrase: 'Pierre "New York"' will return 'pierre' qnd 'new york'.
|
||||||
* 'Pierre "New York"' will return LIKE '%Pierre%' OR LIKE '%New York%'.
|
|
||||||
* @param string q
|
|
||||||
* @param string field
|
|
||||||
* @return string
|
|
||||||
*/
|
*/
|
||||||
function get_qsearch_like_clause($q, $field, $before='%', $after='%')
|
function analyse_qsearch($q, &$qtokens, &$qtoken_modifiers)
|
||||||
{
|
{
|
||||||
$q = stripslashes($q);
|
$q = stripslashes($q);
|
||||||
$tokens = array();
|
$tokens = array();
|
||||||
|
@ -292,27 +309,27 @@ function get_qsearch_like_clause($q, $field, $before='%', $after='%')
|
||||||
case 0:
|
case 0:
|
||||||
if ($ch=='"')
|
if ($ch=='"')
|
||||||
{
|
{
|
||||||
if (strlen($crt_token))
|
$tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
|
||||||
{
|
$crt_token = ""; $crt_token_modifier = "q";
|
||||||
$tokens[] = $crt_token;
|
|
||||||
$token_modifiers[] = $crt_token_modifier;
|
|
||||||
$crt_token = "";
|
|
||||||
$crt_token_modifier = "";
|
|
||||||
}
|
|
||||||
$state=1;
|
$state=1;
|
||||||
}
|
}
|
||||||
elseif ( $ch=='*' )
|
elseif ( $ch=='*' )
|
||||||
{ // wild card
|
{ // wild card
|
||||||
$crt_token .= '%';
|
if (strlen($crt_token))
|
||||||
|
{
|
||||||
|
$crt_token .= $ch;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$crt_token_modifier .= '*';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
elseif ( strcspn($ch, '+-><~')==0 )
|
elseif ( strcspn($ch, '+-><~')==0 )
|
||||||
{ //special full text modifier
|
{ //special full text modifier
|
||||||
if (strlen($crt_token))
|
if (strlen($crt_token))
|
||||||
{
|
{
|
||||||
$tokens[] = $crt_token;
|
$tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
|
||||||
$token_modifiers[] = $crt_token_modifier;
|
$crt_token = ""; $crt_token_modifier = "";
|
||||||
$crt_token = "";
|
|
||||||
$crt_token_modifier = "";
|
|
||||||
}
|
}
|
||||||
$crt_token_modifier .= $ch;
|
$crt_token_modifier .= $ch;
|
||||||
}
|
}
|
||||||
|
@ -320,18 +337,12 @@ function get_qsearch_like_clause($q, $field, $before='%', $after='%')
|
||||||
{ // white space
|
{ // white space
|
||||||
if (strlen($crt_token))
|
if (strlen($crt_token))
|
||||||
{
|
{
|
||||||
$tokens[] = $crt_token;
|
$tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
|
||||||
$token_modifiers[] = $crt_token_modifier;
|
$crt_token = ""; $crt_token_modifier = "";
|
||||||
$crt_token = "";
|
|
||||||
$crt_token_modifier = "";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ( strcspn($ch, '%_')==0)
|
|
||||||
{// escape LIKE specials %_
|
|
||||||
$ch = '\\'.$ch;
|
|
||||||
}
|
|
||||||
$crt_token .= $ch;
|
$crt_token .= $ch;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -339,17 +350,11 @@ function get_qsearch_like_clause($q, $field, $before='%', $after='%')
|
||||||
switch ($ch)
|
switch ($ch)
|
||||||
{
|
{
|
||||||
case '"':
|
case '"':
|
||||||
$tokens[] = $crt_token;
|
$tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier;
|
||||||
$token_modifiers[] = $crt_token_modifier;
|
$crt_token = ""; $crt_token_modifier = "";
|
||||||
$crt_token = "";
|
|
||||||
$crt_token_modifier = "";
|
|
||||||
$state=0;
|
$state=0;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if ( strcspn($ch, '%_')==0)
|
|
||||||
{// escape LIKE specials %_
|
|
||||||
$ch = '\\'.$ch;
|
|
||||||
}
|
|
||||||
$crt_token .= $ch;
|
$crt_token .= $ch;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -361,21 +366,49 @@ function get_qsearch_like_clause($q, $field, $before='%', $after='%')
|
||||||
$token_modifiers[] = $crt_token_modifier;
|
$token_modifiers[] = $crt_token_modifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$qtokens = array();
|
||||||
|
$qtoken_modifiers = array();
|
||||||
|
for ($i=0; $i<count($tokens); $i++)
|
||||||
|
{
|
||||||
|
if (strstr($token_modifiers[$i], 'q')===false)
|
||||||
|
{
|
||||||
|
if ( substr($tokens[$i], -1)=='*' )
|
||||||
|
{
|
||||||
|
$tokens[$i] = rtrim($tokens[$i], '*');
|
||||||
|
$token_modifiers[$i] .= '*';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( strlen($tokens[$i])==0)
|
||||||
|
continue;
|
||||||
|
$qtokens[] = $tokens[$i];
|
||||||
|
$qtoken_modifiers[] = $token_modifiers[$i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns the LIKE sql clause corresponding to the quick search query
|
||||||
|
* that has been split into tokens
|
||||||
|
* for example file LIKE '%john%' OR file LIKE '%bill%'.
|
||||||
|
*/
|
||||||
|
function get_qsearch_like_clause($tokens, $token_modifiers, $field)
|
||||||
|
{
|
||||||
$clauses = array();
|
$clauses = array();
|
||||||
for ($i=0; $i<count($tokens); $i++)
|
for ($i=0; $i<count($tokens); $i++)
|
||||||
{
|
{
|
||||||
$tokens[$i] = trim($tokens[$i], '%');
|
$token = trim($tokens[$i], '%');
|
||||||
if (strstr($token_modifiers[$i], '-')!==false)
|
if (strstr($token_modifiers[$i], '-')!==false)
|
||||||
continue;
|
continue;
|
||||||
if ( strlen($tokens[$i])==0)
|
if ( strlen($token==0) )
|
||||||
continue;
|
continue;
|
||||||
$clauses[] = $field.' LIKE \''.$before.addslashes($tokens[$i]).$after.'\'';
|
$token = addslashes($token);
|
||||||
|
$token = str_replace( array('%','_'), array('\\%','\\_'), $token); // escape LIKE specials %_
|
||||||
|
$clauses[] = $field.' LIKE \'%'.$token.'%\'';
|
||||||
}
|
}
|
||||||
|
|
||||||
return count($clauses) ? '('.implode(' OR ', $clauses).')' : null;
|
return count($clauses) ? '('.implode(' OR ', $clauses).')' : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns the search results corresponding to a quick/query search.
|
* returns the search results corresponding to a quick/query search.
|
||||||
* A quick/query search returns many items (search is not strict), but results
|
* A quick/query search returns many items (search is not strict), but results
|
||||||
|
@ -395,6 +428,8 @@ function get_qsearch_like_clause($q, $field, $before='%', $after='%')
|
||||||
*/
|
*/
|
||||||
function get_quick_search_results($q, $super_order_by, $images_where='')
|
function get_quick_search_results($q, $super_order_by, $images_where='')
|
||||||
{
|
{
|
||||||
|
global $user, $conf;
|
||||||
|
|
||||||
$search_results =
|
$search_results =
|
||||||
array(
|
array(
|
||||||
'items' => array(),
|
'items' => array(),
|
||||||
|
@ -405,9 +440,11 @@ function get_quick_search_results($q, $super_order_by, $images_where='')
|
||||||
{
|
{
|
||||||
return $search_results;
|
return $search_results;
|
||||||
}
|
}
|
||||||
$q_like_field = '@@__db_field__@@'; //something never in a search
|
|
||||||
$q_like_clause = get_qsearch_like_clause($q, $q_like_field );
|
|
||||||
|
|
||||||
|
analyse_qsearch($q, $tokens, $token_modifiers);
|
||||||
|
|
||||||
|
$q_like_field = '@@__db_field__@@'; //something never in a search
|
||||||
|
$q_like_clause = get_qsearch_like_clause($tokens, $token_modifiers, $q_like_field );
|
||||||
|
|
||||||
// Step 1 - first we find matches in #images table ===========================
|
// Step 1 - first we find matches in #images table ===========================
|
||||||
$where_clauses='MATCH(i.name, i.comment) AGAINST( \''.$q.'\' IN BOOLEAN MODE)';
|
$where_clauses='MATCH(i.name, i.comment) AGAINST( \''.$q.'\' IN BOOLEAN MODE)';
|
||||||
|
@ -448,34 +485,126 @@ SELECT i.id,
|
||||||
|
|
||||||
|
|
||||||
// Step 2 - search tags corresponding to the query $q ========================
|
// Step 2 - search tags corresponding to the query $q ========================
|
||||||
if (!empty($q_like_clause))
|
$transliterated_tokens = array();
|
||||||
{ // search name and url name (without accents)
|
$token_tags = array();
|
||||||
$query = '
|
foreach ($tokens as $token)
|
||||||
SELECT id, name, url_name
|
{
|
||||||
|
$transliterated_tokens[] = transliterate($token);
|
||||||
|
$token_tags[] = array();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2.1 - find match tags for every token in the query search
|
||||||
|
$all_tags = array();
|
||||||
|
$query = '
|
||||||
|
SELECT id, name, url_name, COUNT(image_id) AS nb_images
|
||||||
FROM '.TAGS_TABLE.'
|
FROM '.TAGS_TABLE.'
|
||||||
WHERE ('.str_replace($q_like_field, 'CONVERT(name, CHAR)', $q_like_clause).'
|
INNER JOIN '.IMAGE_TAG_TABLE.' ON id=tag_id
|
||||||
OR '.str_replace($q_like_field, 'url_name', $q_like_clause).')';
|
GROUP BY id';
|
||||||
$tags = hash_from_query($query, 'id');
|
$result = pwg_query($query);
|
||||||
if ( !empty($tags) )
|
while ($tag = pwg_db_fetch_assoc($result))
|
||||||
{ // we got some tags; get the images
|
{
|
||||||
$search_results['qs']['matching_tags']=$tags;
|
$transliterated_tag = transliterate($tag['name']);
|
||||||
|
|
||||||
|
// find how this tag matches query tokens
|
||||||
|
for ($i=0; $i<count($tokens); $i++)
|
||||||
|
{
|
||||||
|
if (strstr($token_modifiers[$i], '-')!==false)
|
||||||
|
continue;// ignore this NOT token
|
||||||
|
$transliterated_token = $transliterated_tokens[$i];
|
||||||
|
|
||||||
|
$match = false;
|
||||||
|
$pos = 0;
|
||||||
|
while ( ($pos = strpos($transliterated_tag, $transliterated_token, $pos)) !== false)
|
||||||
|
{
|
||||||
|
if (strstr($token_modifiers[$i], '*')!==false)
|
||||||
|
{// wildcard in this token
|
||||||
|
$match = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$token_len = strlen($transliterated_token);
|
||||||
|
|
||||||
|
$word_begin = $pos;
|
||||||
|
while ($word_begin>0)
|
||||||
|
{
|
||||||
|
if (! is_word_char($transliterated_tag[$word_begin-1]) )
|
||||||
|
break;
|
||||||
|
$word_begin--;
|
||||||
|
}
|
||||||
|
|
||||||
|
$word_end = $pos + $token_len;
|
||||||
|
while ($word_end<strlen($transliterated_tag) && is_word_char($transliterated_tag[$word_end]) )
|
||||||
|
$word_end++;
|
||||||
|
|
||||||
|
$this_score = $token_len / ($word_end-$word_begin);
|
||||||
|
if ($token_len <= 2)
|
||||||
|
{// search for 1 or 2 characters must match exactly to avoid retrieving too much data
|
||||||
|
if ($token_len != $word_end-$word_begin)
|
||||||
|
$this_score = 0;
|
||||||
|
}
|
||||||
|
elseif ($token_len == 3)
|
||||||
|
{
|
||||||
|
if ($word_end-$word_begin > 4)
|
||||||
|
$this_score = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this_score>0)
|
||||||
|
$match = max($match, $this_score );
|
||||||
|
$pos++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($match)
|
||||||
|
{
|
||||||
|
$tag_id = (int)$tag['id'];
|
||||||
|
$all_tags[$tag_id] = $tag;
|
||||||
|
$token_tags[$i][] = array('tag_id'=>$tag_id, 'score'=>$match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$search_results['qs']['matching_tags']=$all_tags;
|
||||||
|
|
||||||
|
// Step 2.2 - reduce matching tags for every token in the query search
|
||||||
|
$score_cmp_fn = create_function('$a,$b', 'return 100*($b["score"]-$a["score"]);');
|
||||||
|
foreach ($token_tags as &$tt)
|
||||||
|
{
|
||||||
|
usort($tt, $score_cmp_fn);
|
||||||
|
$nb_images = 0;
|
||||||
|
$prev_score = 0;
|
||||||
|
for ($j=0; $j<count($tt); $j++)
|
||||||
|
{
|
||||||
|
if ($nb_images > 200 && $prev_score > $tt[$j]['score'] )
|
||||||
|
{// "many" images in previous tags and starting from this tag is less relevent
|
||||||
|
$tt = array_slice( $tt, 0, $j);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$nb_images += $all_tags[ $tt[$j]['tag_id'] ]['nb_images'];
|
||||||
|
$prev_score = $tt[$j]['score'];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2.3 - get the images for tags
|
||||||
|
for ($i=0; $i<count($token_tags); $i++)
|
||||||
|
{
|
||||||
|
$tag_ids = array();
|
||||||
|
foreach($token_tags[$i] as $arr)
|
||||||
|
$tag_ids[] = $arr['tag_id'];
|
||||||
|
|
||||||
|
if (!empty($tag_ids))
|
||||||
|
{
|
||||||
$query = '
|
$query = '
|
||||||
SELECT image_id, COUNT(tag_id) AS weight
|
SELECT image_id
|
||||||
FROM '.IMAGE_TAG_TABLE.'
|
FROM '.IMAGE_TAG_TABLE.'
|
||||||
WHERE tag_id IN ('.implode(',',array_keys($tags)).')
|
WHERE tag_id IN ('.implode(',',$tag_ids).')
|
||||||
GROUP BY image_id';
|
GROUP BY image_id';
|
||||||
$result = pwg_query($query);
|
$result = pwg_query($query);
|
||||||
while ($row = pwg_db_fetch_assoc($result))
|
while ($row = pwg_db_fetch_assoc($result))
|
||||||
{ // weight is important when sorting images by relevance
|
{ // weight is important when sorting images by relevance
|
||||||
$image_id=(int)$row['image_id'];
|
$image_id=(int)$row['image_id'];
|
||||||
@$by_weights[$image_id] += $row['weight'];
|
@$by_weights[$image_id] += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Step 3 - search categories corresponding to the query $q ==================
|
// Step 3 - search categories corresponding to the query $q ==================
|
||||||
global $user;
|
|
||||||
$query = '
|
$query = '
|
||||||
SELECT id, name, permalink, nb_images
|
SELECT id, name, permalink, nb_images
|
||||||
FROM '.CATEGORIES_TABLE.'
|
FROM '.CATEGORIES_TABLE.'
|
||||||
|
@ -531,7 +660,6 @@ SELECT id, name, permalink, nb_images
|
||||||
null,true
|
null,true
|
||||||
);
|
);
|
||||||
|
|
||||||
global $conf;
|
|
||||||
$query = '
|
$query = '
|
||||||
SELECT DISTINCT(id)
|
SELECT DISTINCT(id)
|
||||||
FROM '.IMAGES_TABLE.' i
|
FROM '.IMAGES_TABLE.' i
|
||||||
|
|
Loading…
Add table
Reference in a new issue