From dc17fc9ba94b9490ed79fc82b9c4d5bf7f2640af Mon Sep 17 00:00:00 2001 From: rvelices Date: Sat, 22 Mar 2014 13:03:45 +0000 Subject: bug 3056: quick search OR operator priority taken into account search for 'mary qwerty' will ignore 'qwerty' and return only results for 'mary' if there is no such thing as 'qwerty' in the photos (if there was 'mary' and 'qwerty', the results for both 'mary' AND 'qwerty' would be shown) git-svn-id: http://piwigo.org/svn/trunk@27882 68402e56-0260-453c-a942-63ccdbb3a9ee --- include/functions_search.inc.php | 133 +++++++++++++++++++++++++++++--------- index.php | 9 +++ language/en_UK/common.lang.php | 1 + themes/default/template/index.tpl | 19 ++++-- themes/default/theme.css | 2 +- 5 files changed, 128 insertions(+), 36 deletions(-) diff --git a/include/functions_search.inc.php b/include/functions_search.inc.php index c67cfdf0b..4cda7f4ef 100644 --- a/include/functions_search.inc.php +++ b/include/functions_search.inc.php @@ -305,10 +305,11 @@ define('QST_WILDCARD', QST_WILDCARD_BEGIN|QST_WILDCARD_END); * @param string $q */ +/** Represents a single word or quoted phrase to be searched.*/ class QSingleToken { var $is_single = true; - var $token; + var $token; /* the actual word/phrase string*/ var $idx; function __construct($token) @@ -317,11 +318,12 @@ class QSingleToken } } +/** Represents an expression of several words or sub expressions to be searched.*/ class QMultiToken { var $is_single = false; - var $tokens = array(); - var $token_modifiers = array(); + var $tokens = array(); // the actual array of QSingleToken or QMultiToken + var $token_modifiers = array(); // modifiers (OR,NOT,...) for every token function __toString() { @@ -358,7 +360,7 @@ class QMultiToken return $s; } - function push(&$token, &$modifier) + private function push(&$token, &$modifier) { $this->tokens[] = new QSingleToken($token); $this->token_modifiers[] = $modifier; @@ -366,6 +368,13 @@ class QMultiToken $modifier = 0; } + /** + * Parses the input query string by tokenizing the input, generating the modifiers (and/or/not/quotation/wildcards...). + * Recursivity occurs when parsing () + * @param string $q the actual query to be parsed + * @param int $qi the character index in $q where to start parsing + * @param int $level the depth from root in the tree (number of opened and unclosed opening brackets) + */ protected function parse_expression($q, &$qi, $level) { $crt_token = ""; @@ -486,6 +495,52 @@ class QMultiToken } } } + + private static function priority($modifier) + { + return $modifier & QST_OR ? 0 :1; + } + + /* because evaluations occur left to right, we ensure that 'a OR b c d' is interpreted as 'a OR (b c d)'*/ + protected function check_operator_priority() + { + for ($i=0; $itokens); $i++) + { + if (!$this->tokens[$i]->is_single) + $this->tokens[$i]->check_operator_priority(); + if ($i==1) + $crt_prio = self::priority($this->token_modifiers[$i]); + if ($i<=1) + continue; + $prio = self::priority($this->token_modifiers[$i]); + if ($prio > $crt_prio) + {// e.g. 'a OR b c d' i=2, operator(c)=AND -> prio(AND) > prio(OR) = operator(b) + $term_count = 2; // at least b and c to be regrouped + for ($j=$i+1; $jtokens); $j++) + { + if (self::priority($this->token_modifiers[$j]) >= $prio) + $term_count++; // also take d + else + break; + } + + $i--; // move pointer to b + // crate sub expression (b c d) + $sub = new QMultiToken; + $sub->tokens = array_splice($this->tokens, $i, $term_count); + $sub->token_modifiers = array_splice($this->token_modifiers, $i, $term_count); + + // rewrite ourseleves as a (b c d) + array_splice($this->tokens, $i, 0, array($sub)); + array_splice($this->token_modifiers, $i, 0, array($sub->token_modifiers[0]&QST_OR)); + $sub->token_modifiers[0] &= ~QST_OR; + + $sub->check_operator_priority(); + } + else + $crt_prio = $prio; + } + } } class QExpression extends QMultiToken @@ -497,28 +552,39 @@ class QExpression extends QMultiToken { $i = 0; $this->parse_expression($q, $i, 0); - //@TODO: manipulate the tree so that 'a OR b c' is the same as 'b c OR a' - $this->build_single_tokens($this); + //manipulate the tree so that 'a OR b c' is the same as 'b c OR a' + $this->check_operator_priority(); + $this->build_single_tokens($this, 0); } - private function build_single_tokens(QMultiToken $expr) + private function build_single_tokens(QMultiToken $expr, $this_is_not) { - //@TODO: double negation results in no negation in token modifier for ($i=0; $itokens); $i++) { $token = $expr->tokens[$i]; + $crt_is_not = ($expr->token_modifiers[$i] ^ $this_is_not) & QST_NOT; // no negation OR double negation -> no negation; + if ($token->is_single) { $token->idx = count($this->stokens); $this->stokens[] = $token->token; - $this->stoken_modifiers[] = $expr->token_modifiers[$i]; + + $modifier = $expr->token_modifiers[$i]; + if ($crt_is_not) + $modifier |= QST_NOT; + else + $modifier &= ~QST_NOT; + $this->stoken_modifiers[] = $modifier; } else - $this->build_single_tokens($token); + $this->build_single_tokens($token, $crt_is_not); } } } +/** + Structure of results being filled from different tables +*/ class QResults { var $all_tags; @@ -729,39 +795,43 @@ SELECT image_id FROM '.IMAGE_TAG_TABLE.' } -function qsearch_eval(QExpression $expr, QResults $qsr, QMultiToken $crt_expr) +function qsearch_eval(QMultiToken $expr, QResults $qsr, &$qualifies, &$ignored_terms) { + $qualifies = false; // until we find at least one positive term + $ignored_terms = array(); + $ids = $not_ids = array(); - $first = true; - for ($i=0; $itokens); $i++) + + for ($i=0; $itokens); $i++) { - $current = $crt_expr->tokens[$i]; - if ($current->is_single) + $crt = $expr->tokens[$i]; + if ($crt->is_single) { - $crt_ids = $qsr->iids[$current->idx] = array_unique( array_merge($qsr->images_iids[$current->idx], $qsr->tag_iids[$current->idx]) ); + $crt_ids = $qsr->iids[$crt->idx] = array_unique( array_merge($qsr->images_iids[$crt->idx], $qsr->tag_iids[$crt->idx]) ); + $crt_qualifies = count($crt_ids)>0 || count($qsr->tag_ids[$crt->idx])>0; + $crt_ignored_terms = $crt_qualifies ? array() : array($crt->token); } else - $crt_ids = qsearch_eval($expr, $qsr, $current); - $modifier = $crt_expr->token_modifiers[$i]; + $crt_ids = qsearch_eval($crt, $qsr, $crt_qualifies, $crt_ignored_terms); + $modifier = $expr->token_modifiers[$i]; if ($modifier & QST_NOT) $not_ids = array_unique( array_merge($not_ids, $crt_ids)); else { + $ignored_terms = array_merge($ignored_terms, $crt_ignored_terms); if ($modifier & QST_OR) + { $ids = array_unique( array_merge($ids, $crt_ids) ); - else + $qualifies |= $crt_qualifies; + } + elseif ($crt_qualifies) { - if ($current->is_single && empty($crt_ids)) - { - //@TODO: mark this term as unmatched and tell users - //@TODO: if we don't find a term at all, maybe ignore it and produce some results - } - if ($first) - $ids = $crt_ids; - else + if ($qualifies) $ids = array_intersect($ids, $crt_ids); - $first= false; + else + $ids = $crt_ids; + $qualifies = true; } } } @@ -778,6 +848,7 @@ function qsearch_eval(QExpression $expr, QResults $qsr, QMultiToken $crt_expr) * array ( * 'items' => array of matching images * 'qs' => array( + * 'unmatched_terms' => array of terms from the input string that were not matched * 'matching_tags' => array of matching tags * 'matching_cats' => array of matching categories * 'matching_cats_no_images' =>array(99) - matching categories without images @@ -791,8 +862,8 @@ function qsearch_eval(QExpression $expr, QResults $qsr, QMultiToken $crt_expr) */ function get_quick_search_results($q, $super_order_by, $images_where='') { - global $user, $conf; - + global $conf; + //@TODO: maybe cache for 10 minutes the result set to avoid many expensive sql calls when navigating the pictures $search_results = array( 'items' => array(), @@ -807,7 +878,7 @@ function get_quick_search_results($q, $super_order_by, $images_where='') qsearch_get_images($expression, $qsr); //var_export($qsr->all_tags); - $ids = qsearch_eval($expression, $qsr, $expression); + $ids = qsearch_eval($expression, $qsr, $tmp, $search_results['qs']['unmatched_terms']); $debug[] = "