bug 3056: quick search - now tag search is the same as image search (full text match or like operator)
git-svn-id: http://piwigo.org/svn/trunk@28144 68402e56-0260-453c-a942-63ccdbb3a9ee
This commit is contained in:
parent
2c3eb75ee9
commit
3ceca0d755
1 changed files with 89 additions and 208 deletions
|
@ -255,38 +255,6 @@ SELECT DISTINCT(id)
|
|||
return $items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds if a char is a letter, a figure or any char of the extended ASCII table (>127).
|
||||
*
|
||||
* @param char $ch
|
||||
* @return bool
|
||||
*/
|
||||
function is_word_char($ch)
|
||||
{
|
||||
return ($ch>='0' && $ch<='9') || ($ch>='a' && $ch<='z') || ($ch>='A' && $ch<='Z') || ord($ch)>127;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds if a char is a special token for word start: [{<=*+
|
||||
*
|
||||
* @param char $ch
|
||||
* @return bool
|
||||
*/
|
||||
function is_odd_wbreak_begin($ch)
|
||||
{
|
||||
return strpos('[{<=*+', $ch)===false ? false:true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds if a char is a special token for word end: ]}>=*+
|
||||
*
|
||||
* @param char $ch
|
||||
* @return bool
|
||||
*/
|
||||
function is_odd_wbreak_end($ch)
|
||||
{
|
||||
return strpos(']}>=*+', $ch)===false ? false:true;
|
||||
}
|
||||
|
||||
|
||||
define('QST_QUOTED', 0x01);
|
||||
|
@ -295,7 +263,7 @@ define('QST_OR', 0x04);
|
|||
define('QST_WILDCARD_BEGIN', 0x08);
|
||||
define('QST_WILDCARD_END', 0x10);
|
||||
define('QST_WILDCARD', QST_WILDCARD_BEGIN|QST_WILDCARD_END);
|
||||
|
||||
define('QST_BREAK', 0x20);
|
||||
|
||||
class QSearchScope
|
||||
{
|
||||
|
@ -552,6 +520,8 @@ class QMultiToken
|
|||
{
|
||||
if (strlen($token) || (isset($scope) && $scope->nullable))
|
||||
{
|
||||
if (isset($scope))
|
||||
$modifier |= QST_BREAK;
|
||||
$this->tokens[] = new QSingleToken($token, $modifier, $scope);
|
||||
}
|
||||
$token = "";
|
||||
|
@ -713,9 +683,18 @@ class QMultiToken
|
|||
if ($remove)
|
||||
{
|
||||
array_splice($this->tokens, $i, 1);
|
||||
if ($i<count($this->tokens) && $this->tokens[$i]->is_single)
|
||||
{
|
||||
$this->tokens[$i]->modifier |= QST_BREAK;
|
||||
}
|
||||
$i--;
|
||||
}
|
||||
}
|
||||
|
||||
if ($level>0 && count($this->tokens) && $this->tokens[0]->is_single)
|
||||
{
|
||||
$this->tokens[0]->modifier |= QST_BREAK;
|
||||
}
|
||||
}
|
||||
|
||||
private function apply_scope(QSearchScope $scope)
|
||||
|
@ -836,6 +815,39 @@ class QResults
|
|||
var $iids;
|
||||
}
|
||||
|
||||
function qsearch_get_text_token_search_sql($token, $fields)
|
||||
{
|
||||
$clauses = array();
|
||||
$variants = array_merge(array($token->term), $token->variants);
|
||||
$fts = array();
|
||||
foreach ($variants as $variant)
|
||||
{
|
||||
if (mb_strlen($variant)<=3
|
||||
|| strcspn($variant, '!"#$%&()*+,./:;<=>?@[\]^`{|}~') < 3)
|
||||
{// odd term or too short for full text search; fallback to regex but unfortunately this is diacritic/accent sensitive
|
||||
$pre = ($token->modifier & QST_WILDCARD_BEGIN) ? '' : '[[:<:]]';
|
||||
$post = ($token->modifier & QST_WILDCARD_END) ? '' : '[[:>:]]';
|
||||
foreach( $fields as $field)
|
||||
$clauses[] = $field.' REGEXP \''.$pre.addslashes(preg_quote($variant)).$post.'\'';
|
||||
}
|
||||
else
|
||||
{
|
||||
$ft = $variant;
|
||||
if ($token->modifier & QST_QUOTED)
|
||||
$ft = '"'.$ft.'"';
|
||||
if ($token->modifier & QST_WILDCARD_END)
|
||||
$ft .= '*';
|
||||
$fts[] = $ft;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($fts))
|
||||
{
|
||||
$clauses[] = 'MATCH('.implode(', ',$fields).') AGAINST( \''.addslashes(implode(' ',$fts)).'\' IN BOOLEAN MODE)';
|
||||
}
|
||||
return $clauses;
|
||||
}
|
||||
|
||||
function qsearch_get_images(QExpression $expr, QResults $qsr)
|
||||
{
|
||||
$qsr->images_iids = array_fill(0, count($expr->stokens), array());
|
||||
|
@ -856,34 +868,7 @@ function qsearch_get_images(QExpression $expr, QResults $qsr)
|
|||
{
|
||||
case 'photo':
|
||||
$clauses[] = $file_like;
|
||||
|
||||
$variants = array_merge(array($token->term), $token->variants);
|
||||
$fts = array();
|
||||
foreach ($variants as $variant)
|
||||
{
|
||||
if (mb_strlen($variant)<=3
|
||||
|| strcspn($variant, '!"#$%&()*+,./:;<=>?@[\]^`{|}~') < 3)
|
||||
{// odd term or too short for full text search; fallback to regex but unfortunately this is diacritic/accent sensitive
|
||||
$pre = ($token->modifier & QST_WILDCARD_BEGIN) ? '' : '[[:<:]]';
|
||||
$post = ($token->modifier & QST_WILDCARD_END) ? '' : '[[:>:]]';
|
||||
foreach( array('i.name', 'i.comment') as $field)
|
||||
$clauses[] = $field.' REGEXP \''.$pre.addslashes(preg_quote($variant)).$post.'\'';
|
||||
}
|
||||
else
|
||||
{
|
||||
$ft = $variant;
|
||||
if ($expr->stoken_modifiers[$i] & QST_QUOTED)
|
||||
$ft = '"'.$ft.'"';
|
||||
if ($expr->stoken_modifiers[$i] & QST_WILDCARD_END)
|
||||
$ft .= '*';
|
||||
$fts[] = $ft;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($fts))
|
||||
{
|
||||
$clauses[] = 'MATCH(i.name, i.comment) AGAINST( \''.addslashes(implode(' ',$fts)).'\' IN BOOLEAN MODE)';
|
||||
}
|
||||
$clauses = array_merge($clauses, qsearch_get_text_token_search_sql($token, array('name','comment')));
|
||||
break;
|
||||
|
||||
case 'file':
|
||||
|
@ -929,168 +914,49 @@ function qsearch_get_images(QExpression $expr, QResults $qsr)
|
|||
|
||||
function qsearch_get_tags(QExpression $expr, QResults $qsr)
|
||||
{
|
||||
$tokens = $expr->stokens;
|
||||
$token_modifiers = $expr->stoken_modifiers;
|
||||
|
||||
$token_tag_ids = array_fill(0, count($tokens), array() );
|
||||
$token_tag_ids = $qsr->tag_iids = array_fill(0, count($expr->stokens), array() );
|
||||
$all_tags = array();
|
||||
|
||||
$token_tag_scores = $token_tag_ids;
|
||||
$transliterated_tokens = array();
|
||||
foreach ($tokens as $token)
|
||||
for ($i=0; $i<count($expr->stokens); $i++)
|
||||
{
|
||||
if (!isset($token->scope) || 'tag' == $token->scope->id)
|
||||
$token = $expr->stokens[$i];
|
||||
if (isset($token->scope) && 'tag' != $token->scope->id)
|
||||
continue;
|
||||
if (empty($token->term))
|
||||
continue;
|
||||
|
||||
$clauses = qsearch_get_text_token_search_sql( $token, array('name'));
|
||||
$query = 'SELECT * FROM '.TAGS_TABLE.'
|
||||
WHERE ('. implode("\n OR ",$clauses) .')';
|
||||
$result = pwg_query($query);
|
||||
while ($tag = pwg_db_fetch_assoc($result))
|
||||
{
|
||||
$transliterated_tokens[] = transliterate($token->term);
|
||||
}
|
||||
else
|
||||
{
|
||||
$transliterated_tokens[] = '';
|
||||
$token_tag_ids[$i][] = $tag['id'];
|
||||
$all_tags[$tag['id']] = $tag;
|
||||
}
|
||||
}
|
||||
|
||||
$query = '
|
||||
SELECT t.*, COUNT(image_id) AS counter
|
||||
FROM '.TAGS_TABLE.' t
|
||||
INNER JOIN '.IMAGE_TAG_TABLE.' ON id=tag_id
|
||||
GROUP BY id';
|
||||
$result = pwg_query($query);
|
||||
while ($tag = pwg_db_fetch_assoc($result))
|
||||
// check adjacent short words
|
||||
for ($i=0; $i<count($expr->stokens)-1; $i++)
|
||||
{
|
||||
$transliterated_tag = transliterate($tag['name']);
|
||||
|
||||
// find how this tag matches query tokens
|
||||
for ($i=0; $i<count($tokens); $i++)
|
||||
if ( (strlen($expr->stokens[$i])<=3 || strlen($expr->stokens[$i+1])<=3)
|
||||
&& (($expr->stoken_modifiers[$i] & (QST_QUOTED|QST_WILDCARD)) == 0)
|
||||
&& (($expr->stoken_modifiers[$i+1] & (QST_BREAK|QST_QUOTED|QST_WILDCARD)) == 0) )
|
||||
{
|
||||
$transliterated_token = $transliterated_tokens[$i];
|
||||
if (strlen($transliterated_token)==0)
|
||||
continue;
|
||||
|
||||
$match = false;
|
||||
$pos = 0;
|
||||
while ( ($pos = strpos($transliterated_tag, $transliterated_token, $pos)) !== false)
|
||||
$common = array_intersect( $token_tag_ids[$i], $token_tag_ids[$i+1] );
|
||||
if (count($common))
|
||||
{
|
||||
if ( ($token_modifiers[$i]&QST_WILDCARD)==QST_WILDCARD )
|
||||
{// wildcard in this token
|
||||
$match = 1;
|
||||
break;
|
||||
}
|
||||
$token_len = strlen($transliterated_token);
|
||||
|
||||
// search begin of word
|
||||
$wbegin_len=0; $wbegin_char=' ';
|
||||
while ($pos-$wbegin_len > 0)
|
||||
{
|
||||
if (! is_word_char($transliterated_tag[$pos-$wbegin_len-1]) )
|
||||
{
|
||||
$wbegin_char = $transliterated_tag[$pos-$wbegin_len-1];
|
||||
break;
|
||||
}
|
||||
$wbegin_len++;
|
||||
}
|
||||
|
||||
// search end of word
|
||||
$wend_len=0; $wend_char=' ';
|
||||
while ($pos+$token_len+$wend_len < strlen($transliterated_tag))
|
||||
{
|
||||
if (! is_word_char($transliterated_tag[$pos+$token_len+$wend_len]) )
|
||||
{
|
||||
$wend_char = $transliterated_tag[$pos+$token_len+$wend_len];
|
||||
break;
|
||||
}
|
||||
$wend_len++;
|
||||
}
|
||||
|
||||
$this_score = 0;
|
||||
if ( ($token_modifiers[$i]&QST_WILDCARD)==0 )
|
||||
{// no wildcard begin or end
|
||||
if ($token_len <= 2)
|
||||
{// search for 1 or 2 characters must match exactly to avoid retrieving too much data
|
||||
if ($wbegin_len==0 && $wend_len==0 && !is_odd_wbreak_begin($wbegin_char) && !is_odd_wbreak_end($wend_char) )
|
||||
$this_score = 1;
|
||||
}
|
||||
elseif ($token_len == 3)
|
||||
{
|
||||
if ($wbegin_len==0)
|
||||
$this_score = $token_len / ($token_len + $wend_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
$this_score = $token_len / ($token_len + 1.1 * $wbegin_len + 0.9 * $wend_len);
|
||||
}
|
||||
}
|
||||
|
||||
if ($this_score>0)
|
||||
$match = max($match, $this_score );
|
||||
$pos++;
|
||||
}
|
||||
|
||||
if ($match)
|
||||
{
|
||||
$tag_id = (int)$tag['id'];
|
||||
$all_tags[$tag_id] = $tag;
|
||||
$token_tag_ids[$i][] = $tag_id;
|
||||
$token_tag_scores[$i][] = $match;
|
||||
$token_tag_ids[$i] = $token_tag_ids[$i+1] = $common;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// process tags
|
||||
$not_tag_ids = array();
|
||||
for ($i=0; $i<count($tokens); $i++)
|
||||
{
|
||||
array_multisort($token_tag_scores[$i], SORT_DESC|SORT_NUMERIC, $token_tag_ids[$i]);
|
||||
$is_not = $token_modifiers[$i]&QST_NOT;
|
||||
$counter = 0;
|
||||
|
||||
for ($j=0; $j<count($token_tag_scores[$i]); $j++)
|
||||
{
|
||||
if ($is_not)
|
||||
{
|
||||
if ($token_tag_scores[$i][$j] < 0.8 ||
|
||||
($j>0 && $token_tag_scores[$i][$j] < $token_tag_scores[$i][0]) )
|
||||
{
|
||||
array_splice($token_tag_scores[$i], $j);
|
||||
array_splice($token_tag_ids[$i], $j);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$tag_id = $token_tag_ids[$i][$j];
|
||||
$counter += $all_tags[$tag_id]['counter'];
|
||||
if ( $j>0 && (
|
||||
($counter > 100 && $token_tag_scores[$i][0] > $token_tag_scores[$i][$j]) // "many" images in previous tags and starting from this tag is less relevant
|
||||
|| ($token_tag_scores[$i][0]==1 && $token_tag_scores[$i][$j]<0.8)
|
||||
|| ($token_tag_scores[$i][0]>0.8 && $token_tag_scores[$i][$j]<0.5)
|
||||
))
|
||||
{// we remove this tag from the results, but we still leave it in all_tags list so that if we are wrong, the user chooses it
|
||||
array_splice($token_tag_ids[$i], $j);
|
||||
array_splice($token_tag_scores[$i], $j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($is_not)
|
||||
{
|
||||
$not_tag_ids = array_merge($not_tag_ids, $token_tag_ids[$i]);
|
||||
}
|
||||
}
|
||||
|
||||
$all_tags = array_diff_key($all_tags, array_flip($not_tag_ids));
|
||||
usort($all_tags, 'tag_alpha_compare');
|
||||
foreach ( $all_tags as &$tag )
|
||||
{
|
||||
$tag['name'] = trigger_event('render_tag_name', $tag['name'], $tag);
|
||||
}
|
||||
$qsr->all_tags = $all_tags;
|
||||
|
||||
$qsr->tag_ids = $token_tag_ids;
|
||||
$qsr->tag_iids = array_fill(0, count($tokens), array() );
|
||||
|
||||
for ($i=0; $i<count($tokens); $i++)
|
||||
// get images
|
||||
$positive_ids = $not_ids = array();
|
||||
for ($i=0; $i<count($expr->stokens); $i++)
|
||||
{
|
||||
$tag_ids = $token_tag_ids[$i];
|
||||
$token = $expr->stokens[$i];
|
||||
|
||||
if (!empty($tag_ids))
|
||||
{
|
||||
|
@ -1099,8 +965,12 @@ SELECT image_id FROM '.IMAGE_TAG_TABLE.'
|
|||
WHERE tag_id IN ('.implode(',',$tag_ids).')
|
||||
GROUP BY image_id';
|
||||
$qsr->tag_iids[$i] = query2array($query, null, 'image_id');
|
||||
if ($expr->stoken_modifiers[$i]&QST_NOT)
|
||||
$not_ids = array_merge($not_ids, $tag_ids);
|
||||
else
|
||||
$positive_ids = array_merge($positive_ids, $tag_ids);
|
||||
}
|
||||
elseif (isset($tokens[$i]->scope) && 'tag' == $tokens[$i]->scope->id && strlen($token->term)==0)
|
||||
elseif (isset($token->scope) && 'tag' == $token->scope->id && strlen($token->term)==0)
|
||||
{
|
||||
if ($tokens[$i]->modifier & QST_WILDCARD)
|
||||
{// eg. 'tag:*' returns all tagged images
|
||||
|
@ -1112,9 +982,19 @@ SELECT image_id FROM '.IMAGE_TAG_TABLE.'
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
$all_tags = array_intersect_key($all_tags, array_flip( array_diff($positive_ids, $not_ids) ) );
|
||||
usort($all_tags, 'tag_alpha_compare');
|
||||
foreach ( $all_tags as &$tag )
|
||||
{
|
||||
$tag['name'] = trigger_event('render_tag_name', $tag['name'], $tag);
|
||||
}
|
||||
$qsr->all_tags = $all_tags;
|
||||
$qsr->tag_ids = $token_tag_ids;
|
||||
}
|
||||
|
||||
|
||||
|
||||
function qsearch_eval(QMultiToken $expr, QResults $qsr, &$qualifies, &$ignored_terms)
|
||||
{
|
||||
$qualifies = false; // until we find at least one positive term
|
||||
|
@ -1259,6 +1139,7 @@ function get_quick_search_results($q, $options)
|
|||
for ($i=0; $i<count($expression->stokens); $i++)
|
||||
{
|
||||
$debug[] = $expression->stokens[$i].': '.count($qsr->tag_ids[$i]).' tags, '.count($qsr->tag_iids[$i]).' tiids, '.count($qsr->images_iids[$i]).' iiids, '.count($qsr->iids[$i]).' iids'
|
||||
.' modifier:'.dechex($expression->stoken_modifiers[$i])
|
||||
.( !empty($expression->stokens[$i]->variants) ? ' variants: '.implode(', ',$expression->stokens[$i]->variants): '');
|
||||
}
|
||||
$debug[] = 'before perms '.count($ids);
|
||||
|
|
Loading…
Reference in a new issue