- merge r2451 from trunk: normalize behaviour of query search versus std search (now both return items already sorted and permission checked); also more optimized sql queries (in some cases)

git-svn-id: http://piwigo.org/svn/branches/branch-1_7@2452 68402e56-0260-453c-a942-63ccdbb3a9ee
This commit is contained in:
rvelices 2008-07-23 00:56:53 +00:00
parent be13718838
commit b4793e959e
3 changed files with 313 additions and 235 deletions

View file

@ -197,22 +197,21 @@ function get_sql_search_clause($search)
* @param array search
* @return array
*/
function get_regular_search_results($search)
function get_regular_search_results($search, $images_where)
{
global $conf;
$forbidden = get_sql_condition_FandF(
array
(
'forbidden_categories' => 'category_id',
'visible_categories' => 'category_id',
'visible_images' => 'id'
),
"\n AND"
);
$items = array();
$search_clause = get_sql_search_clause($search);
if (!empty($search_clause))
{
$query = '
SELECT DISTINCT(id)
FROM '.IMAGES_TABLE.'
INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
WHERE '.$search_clause.'
;';
$items = array_from_query($query, 'id');
}
$tag_items = array();
if (isset($search['fields']['tags']))
{
@ -220,13 +219,38 @@ SELECT DISTINCT(id)
$search['fields']['tags']['words'],
$search['fields']['tags']['mode']
);
}
$search_clause = get_sql_search_clause($search);
if (!empty($search_clause))
{
$query = '
SELECT DISTINCT(id)
FROM '.IMAGES_TABLE.' i
INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
WHERE '.$search_clause;
if (!empty($images_where))
{
$query .= "\n AND ".$images_where;
}
if (empty($tag_items) or $search['mode']=='AND')
{ // directly use forbidden and order by
$query .= $forbidden.'
'.$conf['order_by'];
}
$items = array_from_query($query, 'id');
}
if ( !empty($tag_items) )
{
$need_permission_check = false;
switch ($search['mode'])
{
case 'AND':
{
if (empty($search_clause))
{
$need_permission_check = true;
$items = $tag_items;
}
else
@ -234,17 +258,34 @@ SELECT DISTINCT(id)
$items = array_intersect($items, $tag_items);
}
break;
}
case 'OR':
{
$before_count = count($items);
$items = array_unique(
array_merge(
$items,
$tag_items
)
);
if ( $before_count < count($items) )
{
$need_permission_check = true;
}
break;
}
if ($need_permission_check and count($items) )
{
$query = '
SELECT DISTINCT(id)
FROM '.IMAGES_TABLE.' i
INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
WHERE id IN ('.implode(',', $items).') '.$forbidden;
if (!empty($images_where))
{
$query .= "\n AND ".$images_where;
}
$query .= '
'.$conf['order_by'];
$items = array_from_query($query, 'id');
}
}
@ -253,204 +294,288 @@ SELECT DISTINCT(id)
/**
* returns the LIKE sql clause corresponding to the quick search query $q
* and the field $field. example q="john bill", field="file" will return
* file LIKE "%john%" OR file LIKE "%bill%". Special characters for MySql
* full text search (+,<,>) are omitted.
* and the field $field. example q='john bill', field='file' will return
* file LIKE '%john%' OR file LIKE '%bill%'. Special characters for MySql full
* text search (+,<,>,~) are omitted. The query can contain a phrase:
* 'Pierre "New York"' will return LIKE '%Pierre%' OR LIKE '%New York%'.
* @param string q
* @param string field
* @return string
*/
function get_qsearch_like_clause($q, $field)
{
$tokens = preg_split('/[\s,.;!\?]+/', $q);
for ($i=0; $i<count($tokens); $i++)
$q = stripslashes($q);
$tokens = array();
$token_modifiers = array();
$crt_token = "";
$crt_token_modifier = "";
$state = 0;
for ($i=0; $i<strlen($q); $i++)
{
$tokens[$i]=str_replace('*','%', $tokens[$i]);
if (preg_match('/^[+<>]/',$tokens[$i]) )
$tokens[$i]=substr($tokens[$i], 1);
else if (substr($tokens[$i], 0, 1)=='-')
$ch = $q[$i];
switch ($state)
{
unset($tokens[$i]);
$i--;
case 0:
if ($ch=='"')
{
if (strlen($crt_token))
{
$tokens[] = $crt_token;
$token_modifiers[] = $crt_token_modifier;
$crt_token = "";
$crt_token_modifier = "";
}
$state=1;
}
elseif ( $ch=='*' )
{ // wild card
$crt_token .= '%';
}
elseif ( strcspn($ch, '+-><~')==0 )
{ //special full text modifier
if (strlen($crt_token))
{
$tokens[] = $crt_token;
$token_modifiers[] = $crt_token_modifier;
$crt_token = "";
$crt_token_modifier = "";
}
$crt_token_modifier .= $ch;
}
elseif (preg_match('/[\s,.;!\?]+/', $ch))
{ // white space
if (strlen($crt_token))
{
$tokens[] = $crt_token;
$token_modifiers[] = $crt_token_modifier;
$crt_token = "";
$crt_token_modifier = "";
}
}
else
{
$crt_token .= $ch;
}
break;
case 1: // qualified with quotes
switch ($ch)
{
case '"':
$tokens[] = $crt_token;
$token_modifiers[] = $crt_token_modifier;
$crt_token = "";
$crt_token_modifier = "";
$state=0;
break;
default:
$crt_token .= $ch;
}
break;
}
}
if (strlen($crt_token))
{
$tokens[] = $crt_token;
$token_modifiers[] = $crt_token_modifier;
}
if (!empty($tokens))
$clauses = array();
for ($i=0; $i<count($tokens); $i++)
{
$query = '(';
for ($i=0; $i<count($tokens); $i++)
{
if ($i>0) $query .= 'OR ';
$query .= ' '.$field.' LIKE "%'.$tokens[$i].'%" ';
}
$query .= ')';
return $query;
$tokens[$i] = trim($tokens[$i], '%');
if (strstr($token_modifiers[$i], '-')!==false)
continue;
if ( strlen($tokens[$i])==0)
continue;
$clauses[] = $field.' LIKE "%'.addslashes($tokens[$i]).'%"';
}
return null;
return count($clauses) ? '('.implode(' OR ', $clauses).')' : null;
}
/**
* returns the search results (array of image ids) corresponding to a
* quick/query search. A quick/query search returns many items (search is
* not strict), but results are sorted by relevance.
* returns the search results corresponding to a quick/query search.
* A quick/query search returns many items (search is not strict), but results
* are sorted by relevance unless $super_order_by is true. Returns:
* array (
* 'items' => array(85,68,79...)
* 'qs' => array(
* 'matching_tags' => array of matching tags
* 'matching_cats' => array of matching categories
* 'matching_cats_no_images' =>array(99) - matching categories without images
* ))
*
* @param string q
* @param bool super_order_by
* @param string images_where optional aditional restriction on images table
* @return array
*/
function get_quick_search_results($q)
function get_quick_search_results($q, $super_order_by, $images_where='')
{
global $page;
$search_results = array();
$search_results =
array(
'items' => array(),
'qs' => array('q'=>stripslashes($q)),
);
$q = trim($q);
if (empty($q))
{
$search_results['items'] = array();
return $search_results;
}
// prepare the big join on images, comments and categories
$query = '
SELECT
i.id, CAST( CONCAT_WS(" ",
IFNULL(i.name,""),
IFNULL(i.comment,""),
IFNULL(GROUP_CONCAT(DISTINCT co.content),""),
IFNULL(GROUP_CONCAT(DISTINCT c.dir),""),
IFNULL(GROUP_CONCAT(DISTINCT c.name),""),
IFNULL(GROUP_CONCAT(DISTINCT c.comment),"") ) AS CHAR) AS ft
FROM (
(
'.IMAGES_TABLE.' i LEFT JOIN '.COMMENTS_TABLE.' co on i.id=co.image_id
)
INNER JOIN
'.IMAGE_CATEGORY_TABLE.' ic on ic.image_id=i.id
)
INNER JOIN
'.CATEGORIES_TABLE.' c on c.id=ic.category_id
'.get_sql_condition_FandF
(
array
(
'forbidden_categories' => 'category_id',
'visible_categories' => 'category_id',
'visible_images' => 'i.id'
),
'WHERE'
).'
GROUP BY i.id';
$q_like_field = '@@__db_field__@@'; //something never in a search
$q_like_clause = get_qsearch_like_clause($q, $q_like_field );
$query = 'SELECT id, MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS q FROM ('.$query.') AS Y
WHERE MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE)';
// Step 1 - first we find matches in #images table ===========================
$where_clauses='MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)';
if (!empty($q_like_clause))
{
$where_clauses .= '
OR '. str_replace($q_like_field, 'file', $q_like_clause);
$where_clauses = '('.$where_clauses.')';
}
$where_clauses = array($where_clauses);
if (!empty($images_where))
{
$where_clauses[]='('.$images_where.')';
}
$where_clauses[] .= get_sql_condition_FandF
(
array( 'visible_images' => 'i.id' ), null, true
);
$query = '
SELECT i.id,
MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS weight
FROM '.IMAGES_TABLE.' i
WHERE '.implode("\n AND ", $where_clauses);
$by_weights=array();
$result = pwg_query($query);
while ($row = mysql_fetch_array($result))
{ // weight is important when sorting images by relevance
if ($row['q'])
if ($row['weight'])
{
$by_weights[(int)$row['id']] = 2*$row['q'];
$by_weights[(int)$row['id']] = 2*$row['weight'];
}
else
{//full text does not match but file name match
$by_weights[(int)$row['id']] = 2;
}
}
$permissions_checked = true;
// now search the file name separately (not done in full text because slower
// and the filename in pwg doesn't have spaces so full text is meaningless )
$q_like_clause = get_qsearch_like_clause($q, 'file' );
if (!empty($q_like_clause))
{
$query = '
SELECT id
FROM '.IMAGES_TABLE.'
WHERE '.$q_like_clause.
get_sql_condition_FandF
(
array
(
'visible_images' => 'id'
),
'AND'
);
$result = pwg_query($query);
while ($row = mysql_fetch_assoc($result))
{ // weight is important when sorting images by relevance
$id=(int)$row['id'];
@$by_weights[$id] += 2;
$permissions_checked = false;
}
}
// now search tag names corresponding to the query $q. we could have searched
// tags earlier during the big join, but for the sake of the performance and
// because tags have only a simple name we do it separately
$q_like_clause = get_qsearch_like_clause($q, 'CONVERT(name, CHAR)' );
// Step 2 - search tags corresponding to the query $q ========================
if (!empty($q_like_clause))
{
// search also by url name (without accents)
$q_like_clause_url = get_qsearch_like_clause($q, 'url_name' );
{ // search name and url name (without accents)
$query = '
SELECT id
SELECT id, name, url_name
FROM '.TAGS_TABLE.'
WHERE '.$q_like_clause.'
OR '.$q_like_clause_url;
$tag_ids = array_from_query( $query, 'id');
if (!empty($tag_ids))
{ // we got some tags
WHERE ('.str_replace($q_like_field, 'CONVERT(name, CHAR)', $q_like_clause).'
OR '.str_replace($q_like_field, 'url_name', $q_like_clause).')';
$tags = hash_from_query($query, 'id');
if ( !empty($tags) )
{ // we got some tags; get the images
$search_results['qs']['matching_tags']=$tags;
$query = '
SELECT image_id, COUNT(tag_id) AS q
SELECT image_id, COUNT(tag_id) AS weight
FROM '.IMAGE_TAG_TABLE.'
WHERE tag_id IN ('.implode(',',$tag_ids).')
WHERE tag_id IN ('.implode(',',array_keys($tags)).')
GROUP BY image_id';
$result = pwg_query($query);
while ($row = mysql_fetch_assoc($result))
{ // weight is important when sorting images by relevance
$image_id=(int)$row['image_id'];
@$by_weights[$image_id] += $row['q'];
$permissions_checked = false;
@$by_weights[$image_id] += $row['weight'];
}
}
}
//at this point, found images might contain images not allowed for the user
if ( !$permissions_checked
and !empty($by_weights)
and !isset($page['super_order_by']) )
{
// before returning the result "as is", make sure the user has the
// permissions for every item
global $conf;
$query = '
SELECT DISTINCT(id)
FROM '.IMAGES_TABLE.'
INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
WHERE id IN ('.implode(',', array_keys($by_weights) ).')
'.get_sql_condition_FandF
(
array
(
'forbidden_categories' => 'category_id',
'visible_categories' => 'category_id',
'visible_images' => 'id'
),
'AND'
).'
'.$conf['order_by'];
$allowed_images = array_flip( array_from_query( $query, 'id') );
$by_weights = array_intersect_key($by_weights, $allowed_images );
$divisor = 4.0 * count($allowed_images);
// decrease weight from 0 to 0.25 corresponding to the order
foreach ($allowed_images as $id=>$rank )
// Step 3 - search categories corresponding to the query $q ==================
global $user;
$query = '
SELECT id, name, permalink, nb_images
FROM '.CATEGORIES_TABLE.'
INNER JOIN '.USER_CACHE_CATEGORIES_TABLE.' ON id=cat_id
WHERE user_id='.$user['id'].'
AND MATCH(name, comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'.
get_sql_condition_FandF (
array( 'visible_categories' => 'cat_id' ), "\n AND"
);
$result = pwg_query($query);
while ($row = mysql_fetch_assoc($result))
{ // weight is important when sorting images by relevance
if ($row['nb_images']==0)
{
$by_weights[$id] -= $rank / $divisor;
$search_results['qs']['matching_cats_no_images'][] = $row;
}
else
{
$search_results['qs']['matching_cats'][$row['id']] = $row;
}
$permissions_checked = true;
}
arsort($by_weights, SORT_NUMERIC);
if ( $permissions_checked )
if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) )
{
$search_results['as_is']=1;
return $search_results;
}
$search_results['items'] = array_keys($by_weights);
// Step 4 - now we have $by_weights ( array image id => weight ) that need
// permission checks and/or matching categories to get images from
$where_clauses = array();
if ( !empty($by_weights) )
{
$where_clauses[]='i.id IN ('
. implode(',', array_keys($by_weights)) . ')';
}
if ( !empty($search_results['qs']['matching_cats']) )
{
$where_clauses[]='category_id IN ('.
implode(',',array_keys($search_results['qs']['matching_cats'])).')';
}
$where_clauses = array( '('.implode("\n OR ",$where_clauses).')' );
if (!empty($images_where))
{
$where_clauses[]='('.$images_where.')';
}
$where_clauses[] = get_sql_condition_FandF(
array
(
'forbidden_categories' => 'category_id',
'visible_categories' => 'category_id',
'visible_images' => 'i.id'
),
null,true
);
global $conf;
$query = '
SELECT DISTINCT(id)
FROM '.IMAGES_TABLE.' i
INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
WHERE '.implode("\n AND ", $where_clauses)."\n".
$conf['order_by'];
$allowed_images = array_from_query( $query, 'id');
if ( $super_order_by or empty($by_weights) )
{
$search_results['items'] = $allowed_images;
return $search_results;
}
$allowed_images = array_flip( $allowed_images );
$divisor = 5.0 * count($allowed_images);
foreach ($allowed_images as $id=>$rank )
{
$weight = isset($by_weights[$id]) ? $by_weights[$id] : 1;
$weight -= $rank/$divisor;
$allowed_images[$id] = $weight;
}
arsort($allowed_images, SORT_NUMERIC);
$search_results['items'] = array_keys($allowed_images);
return $search_results;
}
@ -458,19 +583,20 @@ SELECT DISTINCT(id)
* returns an array of 'items' corresponding to the search id
*
* @param int search id
* @param string images_where optional aditional restriction on images table
* @return array
*/
function get_search_results($search_id)
function get_search_results($search_id, $super_order_by, $images_where='')
{
$search = get_search_array($search_id);
if ( !isset($search['q']) )
{
$result['items'] = get_regular_search_results($search);
$result['items'] = get_regular_search_results($search, $images_where);
return $result;
}
else
{
return get_quick_search_results($search['q']);
return get_quick_search_results($search['q'], $super_order_by, $images_where);
}
}
?>

View file

@ -336,27 +336,16 @@ SELECT DISTINCT(image_id)
{
include_once( PHPWG_ROOT_PATH .'include/functions_search.inc.php' );
$search_result = get_search_results($page['search']);
if ( !empty($search_result['items']) and !isset($search_result['as_is']) )
{
$query = '
SELECT DISTINCT(id)
FROM '.IMAGES_TABLE.'
INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
WHERE id IN ('.implode(',', $search_result['items']).')
'.$forbidden.'
'.$conf['order_by'].'
;';
$page['items'] = array_from_query($query, 'id');
}
else
{
$page['items'] = $search_result['items'];
$search_result = get_search_results($page['search'], @$page['super_order_by'] );
if ( isset($search_result['qs']) )
{//save the details of the query search
$page['qsearch_details'] = $search_result['qs'];
}
$page = array_merge(
$page,
array(
'items' => $search_result['items'],
'title' => '<a href="'.duplicate_index_url(array('start'=>0)).'">'
.l10n('search_result').'</a>',
)
@ -378,7 +367,7 @@ SELECT image_id
(
array
(
'visible_images' => 'image_id'
'visible_images' => 'id'
),
'AND'
).'

View file

@ -759,68 +759,35 @@ function ws_images_search($params, &$service)
include_once( PHPWG_ROOT_PATH .'include/functions_search.inc.php' );
include_once(PHPWG_ROOT_PATH.'include/functions_picture.inc.php');
$where_clauses = ws_std_image_sql_filter( $params );
$order_by = ws_std_image_sql_order($params);
$where_clauses = ws_std_image_sql_filter( $params, 'i.' );
$order_by = ws_std_image_sql_order($params, 'i.');
if ( !empty($where_clauses) and !empty($order_by) )
$super_order_by = false;
if ( !empty($order_by) )
{
$page['super_order_by']=1; // quick_search_result might be faster
global $conf;
$conf['order_by'] = 'ORDER BY '.$order_by;
$super_order_by=true; // quick_search_result might be faster
}
$search_result = get_quick_search_results($params['query']);
global $image_ids; //needed for sorting by rank (usort)
if ( ( !isset($search_result['as_is'])
or !empty($where_clauses)
or !empty($order_by) )
and !empty($search_result['items']) )
{
$where_clauses[] = 'id IN ('
.wordwrap(implode(', ', $search_result['items']), 80, "\n")
.')';
$where_clauses[] = get_sql_condition_FandF(
array
(
'forbidden_categories' => 'category_id',
'visible_categories' => 'category_id',
'visible_images' => 'id'
),
'', true
);
$query = '
SELECT DISTINCT id FROM '.IMAGES_TABLE.' INNER JOIN '.IMAGE_CATEGORY_TABLE.' ON id=image_id
WHERE '.implode('
AND ', $where_clauses);
if (!empty($order_by))
{
$query .= '
ORDER BY '.$order_by;
}
$image_ids = array_from_query($query, 'id');
global $ranks;
$ranks = array_flip( $search_result['items'] );
usort(
$image_ids,
create_function('$i1,$i2', 'global $ranks; return $ranks[$i1]-$ranks[$i2];')
$search_result = get_quick_search_results($params['query'],
$super_order_by,
implode(',', $where_clauses)
);
unset ($ranks);
}
else
{
$image_ids = $search_result['items'];
}
$image_ids = array_slice($image_ids,
$params['page']*$params['per_page'],
$params['per_page'] );
$image_ids = array_slice(
$search_result['items'],
$params['page']*$params['per_page'],
$params['per_page']
);
if ( count($image_ids) )
{
$query = '
SELECT * FROM '.IMAGES_TABLE.'
WHERE id IN ('
.wordwrap(implode(', ', $image_ids), 80, "\n")
.')';
WHERE id IN ('.implode(',', $image_ids).')';
$image_ids = array_flip($image_ids);
$result = pwg_query($query);
while ($row = mysql_fetch_assoc($result))
{
@ -837,14 +804,10 @@ SELECT * FROM '.IMAGES_TABLE.'
$image[$k] = $row[$k];
}
$image = array_merge( $image, ws_std_get_urls($row) );
array_push($images, $image);
$images[$image_ids[$image['id']]] = $image;
}
$image_ids = array_flip($image_ids);
usort(
$images,
create_function('$i1,$i2', 'global $image_ids; return $image_ids[$i1["id"]]-$image_ids[$i2["id"]];')
);
ksort($images, SORT_NUMERIC);
$images = array_values($images);
}