From b4793e959e06a1b06b05a06e3a4e2ac2b35a23e7 Mon Sep 17 00:00:00 2001 From: rvelices Date: Wed, 23 Jul 2008 00:56:53 +0000 Subject: - merge r2451 from trunk: normalize behaviour of query search versus std search (now both return items already sorted and permission checked); also more optimized sql queries (in some cases) git-svn-id: http://piwigo.org/svn/branches/branch-1_7@2452 68402e56-0260-453c-a942-63ccdbb3a9ee --- include/functions_search.inc.php | 438 +++++++++++++++++++++++++-------------- include/section_init.inc.php | 23 +- include/ws_functions.inc.php | 77 ++----- 3 files changed, 308 insertions(+), 230 deletions(-) diff --git a/include/functions_search.inc.php b/include/functions_search.inc.php index 0c2e61214..08a7a762b 100644 --- a/include/functions_search.inc.php +++ b/include/functions_search.inc.php @@ -197,9 +197,29 @@ function get_sql_search_clause($search) * @param array search * @return array */ -function get_regular_search_results($search) +function get_regular_search_results($search, $images_where) { + global $conf; + $forbidden = get_sql_condition_FandF( + array + ( + 'forbidden_categories' => 'category_id', + 'visible_categories' => 'category_id', + 'visible_images' => 'id' + ), + "\n AND" + ); + $items = array(); + $tag_items = array(); + + if (isset($search['fields']['tags'])) + { + $tag_items = get_image_ids_for_tags( + $search['fields']['tags']['words'], + $search['fields']['tags']['mode'] + ); + } $search_clause = get_sql_search_clause($search); @@ -207,26 +227,30 @@ function get_regular_search_results($search) { $query = ' SELECT DISTINCT(id) - FROM '.IMAGES_TABLE.' + FROM '.IMAGES_TABLE.' i INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id - WHERE '.$search_clause.' -;'; + WHERE '.$search_clause; + if (!empty($images_where)) + { + $query .= "\n AND ".$images_where; + } + if (empty($tag_items) or $search['mode']=='AND') + { // directly use forbidden and order by + $query .= $forbidden.' + '.$conf['order_by']; + } $items = array_from_query($query, 'id'); } - if (isset($search['fields']['tags'])) + if ( !empty($tag_items) ) { - $tag_items = get_image_ids_for_tags( - $search['fields']['tags']['words'], - $search['fields']['tags']['mode'] - ); - + $need_permission_check = false; switch ($search['mode']) { case 'AND': - { if (empty($search_clause)) { + $need_permission_check = true; $items = $tag_items; } else @@ -234,17 +258,34 @@ SELECT DISTINCT(id) $items = array_intersect($items, $tag_items); } break; - } case 'OR': - { + $before_count = count($items); $items = array_unique( array_merge( $items, $tag_items ) ); + if ( $before_count < count($items) ) + { + $need_permission_check = true; + } break; + } + if ($need_permission_check and count($items) ) + { + $query = ' +SELECT DISTINCT(id) + FROM '.IMAGES_TABLE.' i + INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id + WHERE id IN ('.implode(',', $items).') '.$forbidden; + if (!empty($images_where)) + { + $query .= "\n AND ".$images_where; } + $query .= ' + '.$conf['order_by']; + $items = array_from_query($query, 'id'); } } @@ -253,204 +294,288 @@ SELECT DISTINCT(id) /** * returns the LIKE sql clause corresponding to the quick search query $q - * and the field $field. example q="john bill", field="file" will return - * file LIKE "%john%" OR file LIKE "%bill%". Special characters for MySql - * full text search (+,<,>) are omitted. + * and the field $field. example q='john bill', field='file' will return + * file LIKE '%john%' OR file LIKE '%bill%'. Special characters for MySql full + * text search (+,<,>,~) are omitted. The query can contain a phrase: + * 'Pierre "New York"' will return LIKE '%Pierre%' OR LIKE '%New York%'. * @param string q * @param string field * @return string */ function get_qsearch_like_clause($q, $field) { - $tokens = preg_split('/[\s,.;!\?]+/', $q); - for ($i=0; $i]/',$tokens[$i]) ) - $tokens[$i]=substr($tokens[$i], 1); - else if (substr($tokens[$i], 0, 1)=='-') + $ch = $q[$i]; + switch ($state) { - unset($tokens[$i]); - $i--; + case 0: + if ($ch=='"') + { + if (strlen($crt_token)) + { + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + $crt_token = ""; + $crt_token_modifier = ""; + } + $state=1; + } + elseif ( $ch=='*' ) + { // wild card + $crt_token .= '%'; + } + elseif ( strcspn($ch, '+-><~')==0 ) + { //special full text modifier + if (strlen($crt_token)) + { + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + $crt_token = ""; + $crt_token_modifier = ""; + } + $crt_token_modifier .= $ch; + } + elseif (preg_match('/[\s,.;!\?]+/', $ch)) + { // white space + if (strlen($crt_token)) + { + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + $crt_token = ""; + $crt_token_modifier = ""; + } + } + else + { + $crt_token .= $ch; + } + break; + case 1: // qualified with quotes + switch ($ch) + { + case '"': + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + $crt_token = ""; + $crt_token_modifier = ""; + $state=0; + break; + default: + $crt_token .= $ch; + } + break; } } + if (strlen($crt_token)) + { + $tokens[] = $crt_token; + $token_modifiers[] = $crt_token_modifier; + } - if (!empty($tokens)) + $clauses = array(); + for ($i=0; $i0) $query .= 'OR '; - $query .= ' '.$field.' LIKE "%'.$tokens[$i].'%" '; - } - $query .= ')'; - return $query; + $tokens[$i] = trim($tokens[$i], '%'); + if (strstr($token_modifiers[$i], '-')!==false) + continue; + if ( strlen($tokens[$i])==0) + continue; + $clauses[] = $field.' LIKE "%'.addslashes($tokens[$i]).'%"'; } - return null; + + return count($clauses) ? '('.implode(' OR ', $clauses).')' : null; } /** - * returns the search results (array of image ids) corresponding to a - * quick/query search. A quick/query search returns many items (search is - * not strict), but results are sorted by relevance. + * returns the search results corresponding to a quick/query search. + * A quick/query search returns many items (search is not strict), but results + * are sorted by relevance unless $super_order_by is true. Returns: + * array ( + * 'items' => array(85,68,79...) + * 'qs' => array( + * 'matching_tags' => array of matching tags + * 'matching_cats' => array of matching categories + * 'matching_cats_no_images' =>array(99) - matching categories without images + * )) * * @param string q + * @param bool super_order_by + * @param string images_where optional aditional restriction on images table * @return array */ -function get_quick_search_results($q) +function get_quick_search_results($q, $super_order_by, $images_where='') { - global $page; - $search_results = array(); + $search_results = + array( + 'items' => array(), + 'qs' => array('q'=>stripslashes($q)), + ); $q = trim($q); if (empty($q)) { - $search_results['items'] = array(); return $search_results; } - // prepare the big join on images, comments and categories - $query = ' -SELECT - i.id, CAST( CONCAT_WS(" ", - IFNULL(i.name,""), - IFNULL(i.comment,""), - IFNULL(GROUP_CONCAT(DISTINCT co.content),""), - IFNULL(GROUP_CONCAT(DISTINCT c.dir),""), - IFNULL(GROUP_CONCAT(DISTINCT c.name),""), - IFNULL(GROUP_CONCAT(DISTINCT c.comment),"") ) AS CHAR) AS ft -FROM ( - ( - '.IMAGES_TABLE.' i LEFT JOIN '.COMMENTS_TABLE.' co on i.id=co.image_id - ) - INNER JOIN - '.IMAGE_CATEGORY_TABLE.' ic on ic.image_id=i.id - ) - INNER JOIN - '.CATEGORIES_TABLE.' c on c.id=ic.category_id -'.get_sql_condition_FandF - ( - array - ( - 'forbidden_categories' => 'category_id', - 'visible_categories' => 'category_id', - 'visible_images' => 'i.id' - ), - 'WHERE' - ).' -GROUP BY i.id'; + $q_like_field = '@@__db_field__@@'; //something never in a search + $q_like_clause = get_qsearch_like_clause($q, $q_like_field ); - $query = 'SELECT id, MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS q FROM ('.$query.') AS Y -WHERE MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE)'; + + // Step 1 - first we find matches in #images table =========================== + $where_clauses='MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'; + if (!empty($q_like_clause)) + { + $where_clauses .= ' + OR '. str_replace($q_like_field, 'file', $q_like_clause); + $where_clauses = '('.$where_clauses.')'; + } + $where_clauses = array($where_clauses); + if (!empty($images_where)) + { + $where_clauses[]='('.$images_where.')'; + } + $where_clauses[] .= get_sql_condition_FandF + ( + array( 'visible_images' => 'i.id' ), null, true + ); + $query = ' +SELECT i.id, + MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS weight + FROM '.IMAGES_TABLE.' i + WHERE '.implode("\n AND ", $where_clauses); $by_weights=array(); $result = pwg_query($query); while ($row = mysql_fetch_array($result)) { // weight is important when sorting images by relevance - if ($row['q']) + if ($row['weight']) { - $by_weights[(int)$row['id']] = 2*$row['q']; + $by_weights[(int)$row['id']] = 2*$row['weight']; } - } - - $permissions_checked = true; - // now search the file name separately (not done in full text because slower - // and the filename in pwg doesn't have spaces so full text is meaningless ) - $q_like_clause = get_qsearch_like_clause($q, 'file' ); - if (!empty($q_like_clause)) - { - $query = ' -SELECT id - FROM '.IMAGES_TABLE.' - WHERE '.$q_like_clause. - get_sql_condition_FandF - ( - array - ( - 'visible_images' => 'id' - ), - 'AND' - ); - $result = pwg_query($query); - while ($row = mysql_fetch_assoc($result)) - { // weight is important when sorting images by relevance - $id=(int)$row['id']; - @$by_weights[$id] += 2; - $permissions_checked = false; + else + {//full text does not match but file name match + $by_weights[(int)$row['id']] = 2; } } - // now search tag names corresponding to the query $q. we could have searched - // tags earlier during the big join, but for the sake of the performance and - // because tags have only a simple name we do it separately - $q_like_clause = get_qsearch_like_clause($q, 'CONVERT(name, CHAR)' ); + + // Step 2 - search tags corresponding to the query $q ======================== if (!empty($q_like_clause)) - { - // search also by url name (without accents) - $q_like_clause_url = get_qsearch_like_clause($q, 'url_name' ); + { // search name and url name (without accents) $query = ' -SELECT id +SELECT id, name, url_name FROM '.TAGS_TABLE.' - WHERE '.$q_like_clause.' - OR '.$q_like_clause_url; - $tag_ids = array_from_query( $query, 'id'); - if (!empty($tag_ids)) - { // we got some tags + WHERE ('.str_replace($q_like_field, 'CONVERT(name, CHAR)', $q_like_clause).' + OR '.str_replace($q_like_field, 'url_name', $q_like_clause).')'; + $tags = hash_from_query($query, 'id'); + if ( !empty($tags) ) + { // we got some tags; get the images + $search_results['qs']['matching_tags']=$tags; $query = ' -SELECT image_id, COUNT(tag_id) AS q +SELECT image_id, COUNT(tag_id) AS weight FROM '.IMAGE_TAG_TABLE.' - WHERE tag_id IN ('.implode(',',$tag_ids).') + WHERE tag_id IN ('.implode(',',array_keys($tags)).') GROUP BY image_id'; $result = pwg_query($query); while ($row = mysql_fetch_assoc($result)) { // weight is important when sorting images by relevance $image_id=(int)$row['image_id']; - @$by_weights[$image_id] += $row['q']; - $permissions_checked = false; + @$by_weights[$image_id] += $row['weight']; } } } - //at this point, found images might contain images not allowed for the user - if ( !$permissions_checked - and !empty($by_weights) - and !isset($page['super_order_by']) ) + + // Step 3 - search categories corresponding to the query $q ================== + global $user; + $query = ' +SELECT id, name, permalink, nb_images + FROM '.CATEGORIES_TABLE.' + INNER JOIN '.USER_CACHE_CATEGORIES_TABLE.' ON id=cat_id + WHERE user_id='.$user['id'].' + AND MATCH(name, comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'. + get_sql_condition_FandF ( + array( 'visible_categories' => 'cat_id' ), "\n AND" + ); + $result = pwg_query($query); + while ($row = mysql_fetch_assoc($result)) + { // weight is important when sorting images by relevance + if ($row['nb_images']==0) + { + $search_results['qs']['matching_cats_no_images'][] = $row; + } + else + { + $search_results['qs']['matching_cats'][$row['id']] = $row; + } + } + + if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) ) { - // before returning the result "as is", make sure the user has the - // permissions for every item - global $conf; - $query = ' + return $search_results; + } + + // Step 4 - now we have $by_weights ( array image id => weight ) that need + // permission checks and/or matching categories to get images from + $where_clauses = array(); + if ( !empty($by_weights) ) + { + $where_clauses[]='i.id IN (' + . implode(',', array_keys($by_weights)) . ')'; + } + if ( !empty($search_results['qs']['matching_cats']) ) + { + $where_clauses[]='category_id IN ('. + implode(',',array_keys($search_results['qs']['matching_cats'])).')'; + } + $where_clauses = array( '('.implode("\n OR ",$where_clauses).')' ); + if (!empty($images_where)) + { + $where_clauses[]='('.$images_where.')'; + } + $where_clauses[] = get_sql_condition_FandF( + array + ( + 'forbidden_categories' => 'category_id', + 'visible_categories' => 'category_id', + 'visible_images' => 'i.id' + ), + null,true + ); + + global $conf; + $query = ' SELECT DISTINCT(id) - FROM '.IMAGES_TABLE.' + FROM '.IMAGES_TABLE.' i INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id - WHERE id IN ('.implode(',', array_keys($by_weights) ).') -'.get_sql_condition_FandF - ( - array - ( - 'forbidden_categories' => 'category_id', - 'visible_categories' => 'category_id', - 'visible_images' => 'id' - ), - 'AND' - ).' - '.$conf['order_by']; - $allowed_images = array_flip( array_from_query( $query, 'id') ); - $by_weights = array_intersect_key($by_weights, $allowed_images ); - $divisor = 4.0 * count($allowed_images); - // decrease weight from 0 to 0.25 corresponding to the order - foreach ($allowed_images as $id=>$rank ) - { - $by_weights[$id] -= $rank / $divisor; - } - $permissions_checked = true; + WHERE '.implode("\n AND ", $where_clauses)."\n". + $conf['order_by']; + + $allowed_images = array_from_query( $query, 'id'); + + if ( $super_order_by or empty($by_weights) ) + { + $search_results['items'] = $allowed_images; + return $search_results; } - arsort($by_weights, SORT_NUMERIC); - if ( $permissions_checked ) + + $allowed_images = array_flip( $allowed_images ); + $divisor = 5.0 * count($allowed_images); + foreach ($allowed_images as $id=>$rank ) { - $search_results['as_is']=1; + $weight = isset($by_weights[$id]) ? $by_weights[$id] : 1; + $weight -= $rank/$divisor; + $allowed_images[$id] = $weight; } - - $search_results['items'] = array_keys($by_weights); + arsort($allowed_images, SORT_NUMERIC); + $search_results['items'] = array_keys($allowed_images); return $search_results; } @@ -458,19 +583,20 @@ SELECT DISTINCT(id) * returns an array of 'items' corresponding to the search id * * @param int search id + * @param string images_where optional aditional restriction on images table * @return array */ -function get_search_results($search_id) +function get_search_results($search_id, $super_order_by, $images_where='') { $search = get_search_array($search_id); if ( !isset($search['q']) ) { - $result['items'] = get_regular_search_results($search); + $result['items'] = get_regular_search_results($search, $images_where); return $result; } else { - return get_quick_search_results($search['q']); + return get_quick_search_results($search['q'], $super_order_by, $images_where); } } ?> \ No newline at end of file diff --git a/include/section_init.inc.php b/include/section_init.inc.php index 6f03cb193..fb232d883 100644 --- a/include/section_init.inc.php +++ b/include/section_init.inc.php @@ -336,27 +336,16 @@ SELECT DISTINCT(image_id) { include_once( PHPWG_ROOT_PATH .'include/functions_search.inc.php' ); - $search_result = get_search_results($page['search']); - if ( !empty($search_result['items']) and !isset($search_result['as_is']) ) - { - $query = ' -SELECT DISTINCT(id) - FROM '.IMAGES_TABLE.' - INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id - WHERE id IN ('.implode(',', $search_result['items']).') - '.$forbidden.' - '.$conf['order_by'].' -;'; - $page['items'] = array_from_query($query, 'id'); - } - else - { - $page['items'] = $search_result['items']; + $search_result = get_search_results($page['search'], @$page['super_order_by'] ); + if ( isset($search_result['qs']) ) + {//save the details of the query search + $page['qsearch_details'] = $search_result['qs']; } $page = array_merge( $page, array( + 'items' => $search_result['items'], 'title' => '' .l10n('search_result').'', ) @@ -378,7 +367,7 @@ SELECT image_id ( array ( - 'visible_images' => 'image_id' + 'visible_images' => 'id' ), 'AND' ).' diff --git a/include/ws_functions.inc.php b/include/ws_functions.inc.php index 8c6458f58..026f48cf4 100644 --- a/include/ws_functions.inc.php +++ b/include/ws_functions.inc.php @@ -759,68 +759,35 @@ function ws_images_search($params, &$service) include_once( PHPWG_ROOT_PATH .'include/functions_search.inc.php' ); include_once(PHPWG_ROOT_PATH.'include/functions_picture.inc.php'); - $where_clauses = ws_std_image_sql_filter( $params ); - $order_by = ws_std_image_sql_order($params); + $where_clauses = ws_std_image_sql_filter( $params, 'i.' ); + $order_by = ws_std_image_sql_order($params, 'i.'); - if ( !empty($where_clauses) and !empty($order_by) ) + $super_order_by = false; + if ( !empty($order_by) ) { - $page['super_order_by']=1; // quick_search_result might be faster + global $conf; + $conf['order_by'] = 'ORDER BY '.$order_by; + $super_order_by=true; // quick_search_result might be faster } - $search_result = get_quick_search_results($params['query']); - global $image_ids; //needed for sorting by rank (usort) - if ( ( !isset($search_result['as_is']) - or !empty($where_clauses) - or !empty($order_by) ) - and !empty($search_result['items']) ) - { - $where_clauses[] = 'id IN (' - .wordwrap(implode(', ', $search_result['items']), 80, "\n") - .')'; - $where_clauses[] = get_sql_condition_FandF( - array - ( - 'forbidden_categories' => 'category_id', - 'visible_categories' => 'category_id', - 'visible_images' => 'id' - ), - '', true - ); - $query = ' -SELECT DISTINCT id FROM '.IMAGES_TABLE.' INNER JOIN '.IMAGE_CATEGORY_TABLE.' ON id=image_id - WHERE '.implode(' - AND ', $where_clauses); - if (!empty($order_by)) - { - $query .= ' - ORDER BY '.$order_by; - } - $image_ids = array_from_query($query, 'id'); - global $ranks; - $ranks = array_flip( $search_result['items'] ); - usort( - $image_ids, - create_function('$i1,$i2', 'global $ranks; return $ranks[$i1]-$ranks[$i2];') + $search_result = get_quick_search_results($params['query'], + $super_order_by, + implode(',', $where_clauses) ); - unset ($ranks); - } - else - { - $image_ids = $search_result['items']; - } - $image_ids = array_slice($image_ids, - $params['page']*$params['per_page'], - $params['per_page'] ); + $image_ids = array_slice( + $search_result['items'], + $params['page']*$params['per_page'], + $params['per_page'] + ); if ( count($image_ids) ) { $query = ' SELECT * FROM '.IMAGES_TABLE.' - WHERE id IN (' - .wordwrap(implode(', ', $image_ids), 80, "\n") - .')'; + WHERE id IN ('.implode(',', $image_ids).')'; + $image_ids = array_flip($image_ids); $result = pwg_query($query); while ($row = mysql_fetch_assoc($result)) { @@ -837,14 +804,10 @@ SELECT * FROM '.IMAGES_TABLE.' $image[$k] = $row[$k]; } $image = array_merge( $image, ws_std_get_urls($row) ); - array_push($images, $image); + $images[$image_ids[$image['id']]] = $image; } - - $image_ids = array_flip($image_ids); - usort( - $images, - create_function('$i1,$i2', 'global $image_ids; return $image_ids[$i1["id"]]-$image_ids[$i2["id"]];') - ); + ksort($images, SORT_NUMERIC); + $images = array_values($images); } -- cgit v1.2.3