From f46b85bc856b06996f347cb86b86a19e5d462391 Mon Sep 17 00:00:00 2001 From: rvelices Date: Sun, 23 Mar 2014 05:38:05 +0000 Subject: bug 3056: quick search - added inflectors for english and french languages - current quick search is kept in the quick search input box - small fixes git-svn-id: http://piwigo.org/svn/trunk@27884 68402e56-0260-453c-a942-63ccdbb3a9ee --- include/functions_search.inc.php | 50 ++++++++++-- include/inflectors/en.php | 136 +++++++++++++++++++++++++++++++ include/inflectors/fr.php | 96 ++++++++++++++++++++++ index.php | 8 +- themes/default/template/menubar_menu.tpl | 4 +- 5 files changed, 282 insertions(+), 12 deletions(-) create mode 100644 include/inflectors/en.php create mode 100644 include/inflectors/fr.php diff --git a/include/functions_search.inc.php b/include/functions_search.inc.php index 4cda7f4ef..5410dd9df 100644 --- a/include/functions_search.inc.php +++ b/include/functions_search.inc.php @@ -592,12 +592,24 @@ class QResults var $tag_iids; var $images_iids; var $iids; + + var $variants; } function qsearch_get_images(QExpression $expr, QResults $qsr) { //@TODO: inflections for english / french $qsr->images_iids = array_fill(0, count($expr->tokens), array()); + + $inflector = null; + $lang_code = substr(get_default_language(),0,2); + include_once(PHPWG_ROOT_PATH.'include/inflectors/'.$lang_code.'.php'); + $class_name = 'Inflector_'.$lang_code; + if (class_exists($class_name)) + { + $inflector = new $class_name; + } + $query_base = 'SELECT id from '.IMAGES_TABLE.' i WHERE '; for ($i=0; $istokens); $i++) { @@ -608,6 +620,19 @@ function qsearch_get_images(QExpression $expr, QResults $qsr) $like = str_replace( array('%','_'), array('\\%','\\_'), $like); // escape LIKE specials %_ $clauses[] = 'CONVERT(file, CHAR) LIKE \'%'.$like.'%\''; + if ($inflector!=null && strlen($token)>2 + && ($expr->stoken_modifiers[$i] & (QST_QUOTED|QST_WILDCARD))==0 + && strcspn($token, '\'0123456789') == strlen($token) + ) + { + $variants = array_unique( array_diff( $inflector->get_variants($token), array($token) ) ); + $qsr->variants[$token] = $variants; + } + else + { + $variants = array(); + } + if (strlen($token)>3) // default minimum full text index { $ft = $token; @@ -615,15 +640,20 @@ function qsearch_get_images(QExpression $expr, QResults $qsr) $ft = '"'.$ft.'"'; if ($expr->stoken_modifiers[$i] & QST_WILDCARD_END) $ft .= '*'; + foreach ($variants as $variant) + { + $ft.=' '.$variant; + } $clauses[] = 'MATCH(i.name, i.comment) AGAINST( \''.addslashes($ft).'\' IN BOOLEAN MODE)'; } else { foreach( array('i.name', 'i.comment') as $field) { - $clauses[] = $field.' LIKE \''.$like.' %\''; + /*$clauses[] = $field.' LIKE \''.$like.' %\''; $clauses[] = $field.' LIKE \'% '.$like.'\''; - $clauses[] = $field.' LIKE \'% '.$like.' %\''; + $clauses[] = $field.' LIKE \'% '.$like.' %\'';*/ + $clauses[] = $field.' REGEXP \'[[:<:]]'.addslashes(preg_quote($token)).'[[:>:]]\''; } } $query = $query_base.'('.implode(' OR ', $clauses).')'; @@ -753,8 +783,12 @@ SELECT t.*, COUNT(image_id) AS counter { $tag_id = $token_tag_ids[$i][$j]; $counter += $all_tags[$tag_id]['counter']; - if ($counter > 200 && $j>0 && $token_tag_scores[$i][0] > $token_tag_scores[$i][$j] ) - {// "many" images in previous tags and starting from this tag is less relevent + if ( $j>0 && ( + ($counter > 100 && $token_tag_scores[$i][0] > $token_tag_scores[$i][$j]) // "many" images in previous tags and starting from this tag is less relevant + || ($token_tag_scores[$i][0]==1 && $token_tag_scores[$i][$j]<0.8) + || ($token_tag_scores[$i][0]>0.8 && $token_tag_scores[$i][$j]<0.5) + )) + {// we remove this tag from the results, but we still leave it in all_tags list so that if we are wrong, the user chooses it array_splice($token_tag_ids[$i], $j); array_splice($token_tag_scores[$i], $j); break; @@ -864,12 +898,13 @@ function get_quick_search_results($q, $super_order_by, $images_where='') { global $conf; //@TODO: maybe cache for 10 minutes the result set to avoid many expensive sql calls when navigating the pictures + $q = trim(stripslashes($q)); $search_results = array( 'items' => array(), - 'qs' => array('q'=>stripslashes($q)), + 'qs' => array('q'=>$q), ); - $q = trim($q); + $expression = new QExpression($q); //var_export($expression); @@ -884,7 +919,8 @@ function get_quick_search_results($q, $super_order_by, $images_where='') $debug[] = count($expression->stokens).' tokens'; for ($i=0; $istokens); $i++) { - $debug[] = $expression->stokens[$i].': '.count($qsr->tag_ids[$i]).' tags, '.count($qsr->tag_iids[$i]).' tiids, '.count($qsr->images_iids[$i]).' iiids, '.count($qsr->iids[$i]).' iids'; + $debug[] = $expression->stokens[$i].': '.count($qsr->tag_ids[$i]).' tags, '.count($qsr->tag_iids[$i]).' tiids, '.count($qsr->images_iids[$i]).' iiids, '.count($qsr->iids[$i]).' iids' + .( !empty($qsr->variants[$expression->stokens[$i]]) ? ' variants: '.implode(', ',$qsr->variants[$expression->stokens[$i]]): ''); } $debug[] = 'before perms '.count($ids); diff --git a/include/inflectors/en.php b/include/inflectors/en.php new file mode 100644 index 000000000..c490ba1b7 --- /dev/null +++ b/include/inflectors/en.php @@ -0,0 +1,136 @@ + 'octopuses', + 'virus' => 'viruses', + 'person' => 'people', + 'man' => 'men', + 'woman' => 'women', + 'child' => 'children', + 'move' => 'moves', + 'mouse' => 'mice', + 'ox' => 'oxen', + 'zombie' => 'zombies', + ); + + $this->exceptions = $tmp; + foreach ($tmp as $k => $v) + $this->exceptions[$v] = $k; + + foreach ( explode(' ', 'new news advice art coal baggage butter clothing cotton currency deer energy equipment experience fish flour food furniture gas homework impatience information jeans knowledge leather love luggage money oil patience police polish progress research rice series sheep silk soap species sugar talent toothpaste travel vinegar weather wood wool work') + as $v) + { + $this->exceptions[$v] = 0; + } + + $this->pluralizers = array_reverse(array( '/$/' => 's', + '/s$/' => 's', + '/^(ax|test)is$/' => '\1es', + '/(alias|status)$/' => '\1es', + '/(bu)s$/' => '\1ses', + '/(buffal|tomat)o$/' => '\1oes', + '/([ti])um$/' => '\1a', + '/([ti])a$/' => '\1a', + '/sis$/' => 'ses', + '/(?:([^f])fe|([lr])f)$/' => '\1\2ves', + '/(hive)$/' => '\1s', + '/([^aeiouy]|qu)y$/' => '\1ies', + '/(x|ch|ss|sh)$/' => '\1es', + '/(matr|vert|ind)(?:ix|ex)$/' => '\1ices', + '/(quiz)$/' => '\1zes', + )); + + $this->singularizers = array_reverse(array( + '/s$/' => '', + '/(ss)$/' => '\1', + '/(n)ews$/' => '\1ews', + '/([ti])a$/' => '\1um', + '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)(sis|ses)$/' => '\1sis', + '/(^analy)(sis|ses)$/' => '\1sis', + '/([^f])ves$/' => '\1fe', + '/(hive)s$/' => '\1', + '/(tive)s$/' => '\1', + '/([lr])ves$/' => '\1f', + '/([^aeiouy]|qu)ies$/' => '\1y', + '/(s)eries$/' => '\1eries', + '/(m)ovies$/' => '\1ovie', + '/(x|ch|ss|sh)es$/' => '\1', + '/(bus)(es)?$/' => '\1', + '/(o)es$/' => '\1', + '/(shoe)s$/' => '\1', + '/(cris|test)(is|es)$/' => '\1is', + '/^(a)x[ie]s$/' => '\1xis', + '/(alias|status)(es)?$/' => '\1', + '/(vert|ind)ices$/' => '\1ex', + '/(matr)ices$/' => '\1ix', + '/(quiz)zes$/' => '\1', + '/(database)s$/' => '\1', + )); + } + + function get_variants($word) + { + $res = array(); + + $word = strtolower($word); + + $rc = @$this->exceptions[$word]; + if ( isset($rc) ) + { + if (!empty($rc)) + $res[] = $rc; + return $res; + } + + foreach ($this->pluralizers as $rule => $replacement) + { + $rc = preg_replace($rule, $replacement, $word, -1, $count); + if ($count) + { + $res[] = $rc; + break; + } + } + + foreach ($this->singularizers as $rule => $replacement) + { + $rc = preg_replace($rule, $replacement, $word, -1, $count); + if ($count) + { + $res[] = $rc; + break; + } + } + + return $res; + } +} +?> \ No newline at end of file diff --git a/include/inflectors/fr.php b/include/inflectors/fr.php new file mode 100644 index 000000000..f634d2a65 --- /dev/null +++ b/include/inflectors/fr.php @@ -0,0 +1,96 @@ + 'messieurs', + 'madame' => 'mesdames', + 'mademoiselle' => 'mesdemoiselles', + ); + + $this->exceptions = $tmp; + foreach ($tmp as $k => $v) + $this->exceptions[$v] = $k; + + $this->pluralizers = array_reverse(array( '/$/' => 's', + '/(bijou|caillou|chou|genou|hibou|joujou|pou|au|eu|eau)$/' => '\1x', + '/(bleu|émeu|landau|lieu|pneu|sarrau)$/' => '\1s', + '/al$/' => 'aux', + '/ail$/' => 'ails', + '/(b|cor|ém|gemm|soupir|trav|vant|vitr)ail$/' => '\1aux', + '/(s|x|z)$/' => '\1', + )); + + $this->singularizers = array_reverse(array( + '/s$/' => '', + '/(bijou|caillou|chou|genou|hibou|joujou|pou|au|eu|eau)x$/' => '\1', + '/(journ|chev)aux$/' => '\1al', + '/ails$/' => 'ail', + '/(b|cor|ém|gemm|soupir|trav|vant|vitr)aux$/' => '\1ail', + )); + } + + function get_variants($word) + { + $res = array(); + + $word = strtolower($word); + + $rc = @$this->exceptions[$word]; + if ( isset($rc) ) + { + if (!empty($rc)) + $res[] = $rc; + return $res; + } + + foreach ($this->pluralizers as $rule => $replacement) + { + $rc = preg_replace($rule, $replacement, $word, -1, $count); + if ($count) + { + $res[] = $rc; + break; + } + } + + foreach ($this->singularizers as $rule => $replacement) + { + $rc = preg_replace($rule, $replacement, $word, -1, $count); + if ($count) + { + $res[] = $rc; + break; + } + } + + return $res; + } +} +?> \ No newline at end of file diff --git a/index.php b/index.php index c6ce22f56..9e58d2123 100644 --- a/index.php +++ b/index.php @@ -85,6 +85,11 @@ if (count($page['items']) > $page['nb_image_page']) $template->assign('thumb_navbar', $page['navigation_bar'] ); +if ( $page['section']=='search' and isset($page['qsearch_details']) ) +{ + $template->assign('QUERY_SEARCH', htmlspecialchars($page['qsearch_details']['q']) ); +} + // caddie filling :-) if (isset($_GET['caddie'])) { @@ -218,9 +223,6 @@ if ( empty($page['is_external']) or !$page['is_external'] ) if ( $page['section']=='search' and $page['start']==0 and !isset($page['chronology_field']) and isset($page['qsearch_details']) ) { - $template->assign('QUERY_SEARCH', - htmlspecialchars($page['qsearch_details']['q']) ); - $cats = array_merge( (array)@$page['qsearch_details']['matching_cats_no_images'], (array)@$page['qsearch_details']['matching_cats'] ); diff --git a/themes/default/template/menubar_menu.tpl b/themes/default/template/menubar_menu.tpl index 2eb2dd3a9..f2c2c4635 100644 --- a/themes/default/template/menubar_menu.tpl +++ b/themes/default/template/menubar_menu.tpl @@ -3,10 +3,10 @@ {if isset($block->data.qsearch) and $block->data.qsearch==true}

is for html validation only - does not affect positioning*}> - +

- + {/if}
    {strip} {foreach from=$block->data item=link} -- cgit v1.2.3