diff options
author | rvelices <rv-github@modusoptimus.com> | 2007-10-12 03:27:34 +0000 |
---|---|---|
committer | rvelices <rv-github@modusoptimus.com> | 2007-10-12 03:27:34 +0000 |
commit | 4afa14cc8d3f26f841c92b083355004251d75087 (patch) | |
tree | 99606ff8bf43f5c30f50d191f3593a5ca6a43260 /include/functions_search.inc.php | |
parent | 36e0e6e8f1a455367f363048fc1825aebbd67eee (diff) |
- fix plugin menu link broken with xamp (realpath behaves differently)
- complete quick search rewriting
- now we can quote phrases as in google "New York" is not the same as New York
- user comments not searched anymore (faster)
- the big full text query does not use joins anymore (faster)
- related tags not shown on the index page, but now you can see the matching tags and matching categories
git-svn-id: http://piwigo.org/svn/trunk@2135 68402e56-0260-453c-a942-63ccdbb3a9ee
Diffstat (limited to 'include/functions_search.inc.php')
-rw-r--r-- | include/functions_search.inc.php | 361 |
1 files changed, 224 insertions, 137 deletions
diff --git a/include/functions_search.inc.php b/include/functions_search.inc.php index 0c2e61214..c34d31463 100644 --- a/include/functions_search.inc.php +++ b/include/functions_search.inc.php @@ -253,150 +253,194 @@ SELECT DISTINCT(id) /** * returns the LIKE sql clause corresponding to the quick search query $q - * and the field $field. example q="john bill", field="file" will return - * file LIKE "%john%" OR file LIKE "%bill%". Special characters for MySql - * full text search (+,<,>) are omitted. + * and the field $field. example q='john bill', field='file' will return + * file LIKE '%john%' OR file LIKE '%bill%'. Special characters for MySql full + * text search (+,<,>,~) are omitted. The query can contain a phrase: + * 'Pierre "New York"' will return LIKE '%Pierre%' OR LIKE '%New York%'. * @param string q * @param string field * @return string */ function get_qsearch_like_clause($q, $field) { - $tokens = preg_split('/[\s,.;!\?]+/', $q); - for ($i=0; $i<count($tokens); $i++) + $q = stripslashes($q); + $tokens = array(); + $token_modifiers = array(); + $crt_token = ""; + $crt_token_modifier = ""; + $state = 0; + + for ($i=0; $i<strlen($q); $i++) { - $tokens[$i]=str_replace('*','%', $tokens[$i]); - if (preg_match('/^[+<>]/',$tokens[$i]) ) - $tokens[$i]=substr($tokens[$i], 1); - else if (substr($tokens[$i], 0, 1)=='-') + $ch = $q[$i]; + switch ($state) { - unset($tokens[$i]); - $i--; + case 0: + if ($ch=='"') + { + if (strlen($crt_token)) + { + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + $crt_token = ""; + $crt_token_modifier = ""; + } + $state=1; + } + elseif ( $ch=='*' ) + { // wild card + $crt_token .= '%'; + } + elseif ( strcspn($ch, '+-><~')==0 ) + { //special full text modifier + if (strlen($crt_token)) + { + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + $crt_token = ""; + $crt_token_modifier = ""; + } + $crt_token_modifier .= $ch; + } + elseif (preg_match('/[\s,.;!\?]+/', $ch)) + { // white space + if (strlen($crt_token)) + { + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + $crt_token = ""; + $crt_token_modifier = ""; + } + } + else + { + $crt_token .= $ch; + } + break; + case 1: // qualified with quotes + switch ($ch) + { + case '"': + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + $crt_token = ""; + $crt_token_modifier = ""; + $state=0; + break; + default: + $crt_token .= $ch; + } + break; } } + if (strlen($crt_token)) + { + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + } - if (!empty($tokens)) + $clauses = array(); + for ($i=0; $i<count($tokens); $i++) { - $query = '('; - for ($i=0; $i<count($tokens); $i++) - { - if ($i>0) $query .= 'OR '; - $query .= ' '.$field.' LIKE "%'.$tokens[$i].'%" '; - } - $query .= ')'; - return $query; + $tokens[$i] = trim($tokens[$i], '%'); + if (strstr($token_modifiers[$i], '-')!==false) + continue; + if ( strlen($tokens[$i])==0) + continue; + $clauses[] = $field.' LIKE "%'.addslashes($tokens[$i]).'%"'; } - return null; + + return count($clauses) ? '('.implode(' OR ', $clauses).')' : null; } /** - * returns the search results (array of image ids) corresponding to a - * quick/query search. A quick/query search returns many items (search is - * not strict), but results are sorted by relevance. + * returns the search results corresponding to a quick/query search. + * A quick/query search returns many items (search is not strict), but results + * are sorted by relevance unless $page['super_order_by'] is set. Returns: + * array ( + * 'items' => array(85,68,79...) + * 'as_is' => 1 (indicates the caller that items are ordered and permissions checked + * 'qs' => array( + * 'matching_tags' => array(85,86) - matching tags + * 'matching_cats' => array(1,2,3) - matching categories + * 'matching_cats_no_images' =>array(99) - matching categories without images + * )) * * @param string q + * @param string images_where optional aditional restriction on images table * @return array */ -function get_quick_search_results($q) +function get_quick_search_results($q, $images_where='') { global $page; - $search_results = array(); + $search_results = + array( + 'items' => array(), + 'as_is' => 1, + 'qs' => array('q'=>stripslashes($q)), + ); $q = trim($q); if (empty($q)) { - $search_results['items'] = array(); return $search_results; } - // prepare the big join on images, comments and categories - $query = ' -SELECT - i.id, CAST( CONCAT_WS(" ", - IFNULL(i.name,""), - IFNULL(i.comment,""), - IFNULL(GROUP_CONCAT(DISTINCT co.content),""), - IFNULL(GROUP_CONCAT(DISTINCT c.dir),""), - IFNULL(GROUP_CONCAT(DISTINCT c.name),""), - IFNULL(GROUP_CONCAT(DISTINCT c.comment),"") ) AS CHAR) AS ft -FROM ( - ( - '.IMAGES_TABLE.' i LEFT JOIN '.COMMENTS_TABLE.' co on i.id=co.image_id - ) - INNER JOIN - '.IMAGE_CATEGORY_TABLE.' ic on ic.image_id=i.id - ) - INNER JOIN - '.CATEGORIES_TABLE.' c on c.id=ic.category_id -'.get_sql_condition_FandF - ( - array - ( - 'forbidden_categories' => 'category_id', - 'visible_categories' => 'category_id', - 'visible_images' => 'i.id' - ), - 'WHERE' - ).' -GROUP BY i.id'; + $q_like_field = '@@__db_field__@@'; //something never in a search + $q_like_clause = get_qsearch_like_clause($q, $q_like_field ); - $query = 'SELECT id, MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS q FROM ('.$query.') AS Y -WHERE MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE)'; + + // Step 1 - first we find matches in #images table =========================== + $where_clauses='MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'; + if (!empty($q_like_clause)) + { + $where_clauses .= ' + OR '. str_replace($q_like_field, 'file', $q_like_clause); + $where_clauses = '('.$where_clauses.')'; + } + $where_clauses = array($where_clauses); + if (!empty($images_where)) + { + $where_clauses[]='('.$images_where.')'; + } + $where_clauses[] .= get_sql_condition_FandF + ( + array( 'visible_images' => 'i.id' ), null, true + ); + $query = ' +SELECT i.id, + MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS weight + FROM '.IMAGES_TABLE.' i + WHERE '.implode("\n AND ", $where_clauses); $by_weights=array(); $result = pwg_query($query); while ($row = mysql_fetch_array($result)) { // weight is important when sorting images by relevance - if ($row['q']) + if ($row['weight']) { - $by_weights[(int)$row['id']] = 2*$row['q']; + $by_weights[(int)$row['id']] = 2*$row['weight']; } - } - - $permissions_checked = true; - // now search the file name separately (not done in full text because slower - // and the filename in pwg doesn't have spaces so full text is meaningless ) - $q_like_clause = get_qsearch_like_clause($q, 'file' ); - if (!empty($q_like_clause)) - { - $query = ' -SELECT id - FROM '.IMAGES_TABLE.' - WHERE '.$q_like_clause. - get_sql_condition_FandF - ( - array - ( - 'visible_images' => 'id' - ), - 'AND' - ); - $result = pwg_query($query); - while ($row = mysql_fetch_assoc($result)) - { // weight is important when sorting images by relevance - $id=(int)$row['id']; - @$by_weights[$id] += 2; - $permissions_checked = false; + else + {//full text does not match but file name match + $by_weights[(int)$row['id']] = 2; } } - // now search tag names corresponding to the query $q. we could have searched - // tags earlier during the big join, but for the sake of the performance and - // because tags have only a simple name we do it separately - $q_like_clause = get_qsearch_like_clause($q, 'CONVERT(name, CHAR)' ); + + // Step 2 - search tags corresponding to the query $q ======================== if (!empty($q_like_clause)) - { - // search also by url name (without accents) - $q_like_clause_url = get_qsearch_like_clause($q, 'url_name' ); + { // search name and url name (without accents) $query = ' SELECT id FROM '.TAGS_TABLE.' - WHERE '.$q_like_clause.' - OR '.$q_like_clause_url; + WHERE ('.str_replace($q_like_field, 'CONVERT(name, CHAR)', $q_like_clause).' + OR '.str_replace($q_like_field, 'url_name', $q_like_clause).')'; $tag_ids = array_from_query( $query, 'id'); if (!empty($tag_ids)) - { // we got some tags + { // we got some tags; get the images + $search_results['qs']['matching_tags']=$tag_ids; $query = ' -SELECT image_id, COUNT(tag_id) AS q +SELECT image_id, COUNT(tag_id) AS weight FROM '.IMAGE_TAG_TABLE.' WHERE tag_id IN ('.implode(',',$tag_ids).') GROUP BY image_id'; @@ -404,53 +448,95 @@ SELECT image_id, COUNT(tag_id) AS q while ($row = mysql_fetch_assoc($result)) { // weight is important when sorting images by relevance $image_id=(int)$row['image_id']; - @$by_weights[$image_id] += $row['q']; - $permissions_checked = false; + @$by_weights[$image_id] += $row['weight']; } } } - //at this point, found images might contain images not allowed for the user - if ( !$permissions_checked - and !empty($by_weights) - and !isset($page['super_order_by']) ) + + // Step 3 - search categories corresponding to the query $q ================== + global $user; + $query = ' +SELECT id, nb_images + FROM '.CATEGORIES_TABLE.' + INNER JOIN '.USER_CACHE_CATEGORIES_TABLE.' ON id=cat_id + WHERE user_id='.$user['id'].' + AND MATCH(name, comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'. + get_sql_condition_FandF ( + array( 'visible_categories' => 'cat_id' ), "\n AND" + ); + $result = pwg_query($query); + while ($row = mysql_fetch_assoc($result)) + { // weight is important when sorting images by relevance + if ($row['nb_images']==0) + { + $search_results['qs']['matching_cats_no_images'][] = $row['id']; + } + else + { + $search_results['qs']['matching_cats'][] = $row['id']; + } + } + + if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) ) { - // before returning the result "as is", make sure the user has the - // permissions for every item - global $conf; - $query = ' + return $search_results; + } + + // Step 4 - now we have $by_weights ( array image id => weight ) that need + // permission checks and/or matching categories to get images from + $where_clauses = array(); + if ( !empty($by_weights) ) + { + $where_clauses[]='i.id IN (' + . implode(',', array_keys($by_weights)) . ')'; + } + if ( !empty($search_results['qs']['matching_cats']) ) + { + $where_clauses[]='category_id IN ('. + implode(',',$search_results['qs']['matching_cats']).')'; + } + $where_clauses = array( '('.implode("\n OR ",$where_clauses).')' ); + if (!empty($images_where)) + { + $where_clauses[]='('.$images_where.')'; + } + $where_clauses[] = get_sql_condition_FandF( + array + ( + 'forbidden_categories' => 'category_id', + 'visible_categories' => 'category_id', + 'visible_images' => 'i.id' + ), + null,true + ); + + global $conf; + $query = ' SELECT DISTINCT(id) - FROM '.IMAGES_TABLE.' + FROM '.IMAGES_TABLE.' i INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id - WHERE id IN ('.implode(',', array_keys($by_weights) ).') -'.get_sql_condition_FandF - ( - array - ( - 'forbidden_categories' => 'category_id', - 'visible_categories' => 'category_id', - 'visible_images' => 'id' - ), - 'AND' - ).' - '.$conf['order_by']; - $allowed_images = array_flip( array_from_query( $query, 'id') ); - $by_weights = array_intersect_key($by_weights, $allowed_images ); - $divisor = 4.0 * count($allowed_images); - // decrease weight from 0 to 0.25 corresponding to the order - foreach ($allowed_images as $id=>$rank ) - { - $by_weights[$id] -= $rank / $divisor; - } - $permissions_checked = true; + WHERE '.implode("\n AND ", $where_clauses)."\n". + $conf['order_by']; + + $allowed_images = array_from_query( $query, 'id'); + + if ( isset($page['super_order_by']) or empty($by_weights) ) + { + $search_results['items'] = $allowed_images; + return $search_results; } - arsort($by_weights, SORT_NUMERIC); - if ( $permissions_checked ) + + $allowed_images = array_flip( $allowed_images ); + $divisor = 5.0 * count($allowed_images); + foreach ($allowed_images as $id=>$rank ) { - $search_results['as_is']=1; + $weight = isset($by_weights[$id]) ? $by_weights[$id] : 1; + $weight -= $rank/$divisor; + $allowed_images[$id] = $weight; } - - $search_results['items'] = array_keys($by_weights); + arsort($allowed_images, SORT_NUMERIC); + $search_results['items'] = array_keys($allowed_images); return $search_results; } @@ -458,9 +544,10 @@ SELECT DISTINCT(id) * returns an array of 'items' corresponding to the search id * * @param int search id + * @param string images_where optional aditional restriction on images table * @return array */ -function get_search_results($search_id) +function get_search_results($search_id, $images_where='') { $search = get_search_array($search_id); if ( !isset($search['q']) ) @@ -470,7 +557,7 @@ function get_search_results($search_id) } else { - return get_quick_search_results($search['q']); + return get_quick_search_results($search['q'], $images_where); } } ?>
\ No newline at end of file |