a397debf02
bug 2973 fixed: automatically use encoding windows-1252 instead of iso-8859-1 (unless we find utf-8 signs) on IPTC before converting to utf-8 git-svn-id: http://piwigo.org/svn/trunk@24968 68402e56-0260-453c-a942-63ccdbb3a9ee
183 lines
No EOL
5.4 KiB
PHP
183 lines
No EOL
5.4 KiB
PHP
<?php
|
|
// +-----------------------------------------------------------------------+
|
|
// | Piwigo - a PHP based photo gallery |
|
|
// +-----------------------------------------------------------------------+
|
|
// | Copyright(C) 2008-2013 Piwigo Team http://piwigo.org |
|
|
// | Copyright(C) 2003-2008 PhpWebGallery Team http://phpwebgallery.net |
|
|
// | Copyright(C) 2002-2003 Pierrick LE GALL http://le-gall.net/pierrick |
|
|
// +-----------------------------------------------------------------------+
|
|
// | This program is free software; you can redistribute it and/or modify |
|
|
// | it under the terms of the GNU General Public License as published by |
|
|
// | the Free Software Foundation |
|
|
// | |
|
|
// | This program is distributed in the hope that it will be useful, but |
|
|
// | WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
|
// | General Public License for more details. |
|
|
// | |
|
|
// | You should have received a copy of the GNU General Public License |
|
|
// | along with this program; if not, write to the Free Software |
|
|
// | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
|
|
// | USA. |
|
|
// +-----------------------------------------------------------------------+
|
|
|
|
/**
|
|
* returns informations from IPTC metadata, mapping is done at the beginning
|
|
* of the function
|
|
*
|
|
* @param string $filename
|
|
* @return array
|
|
*/
|
|
function get_iptc_data($filename, $map)
|
|
{
|
|
global $conf;
|
|
|
|
$result = array();
|
|
|
|
$imginfo = array();
|
|
if (false == @getimagesize($filename, $imginfo) )
|
|
{
|
|
return $result;
|
|
}
|
|
|
|
if (isset($imginfo['APP13']))
|
|
{
|
|
$iptc = iptcparse($imginfo['APP13']);
|
|
if (is_array($iptc))
|
|
{
|
|
$rmap = array_flip($map);
|
|
foreach (array_keys($rmap) as $iptc_key)
|
|
{
|
|
if (isset($iptc[$iptc_key][0]))
|
|
{
|
|
if ($iptc_key == '2#025')
|
|
{
|
|
$value = implode(',',
|
|
array_map('clean_iptc_value',$iptc[$iptc_key]));
|
|
}
|
|
else
|
|
{
|
|
$value = clean_iptc_value($iptc[$iptc_key][0]);
|
|
}
|
|
|
|
foreach (array_keys($map, $iptc_key) as $pwg_key)
|
|
{
|
|
$result[$pwg_key] = $value;
|
|
|
|
if (!$conf['allow_html_in_metadata'])
|
|
{
|
|
// in case the origin of the photo is unsecure (user upload), we
|
|
// remove HTML tags to avoid XSS (malicious execution of
|
|
// javascript)
|
|
$result[$pwg_key] = strip_tags($result[$pwg_key]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* return a cleaned IPTC value
|
|
*
|
|
* @param string value
|
|
* @return string
|
|
*/
|
|
function clean_iptc_value($value)
|
|
{
|
|
// strip leading zeros (weird Kodak Scanner software)
|
|
while ( isset($value[0]) and $value[0] == chr(0))
|
|
{
|
|
$value = substr($value, 1);
|
|
}
|
|
// remove binary nulls
|
|
$value = str_replace(chr(0x00), ' ', $value);
|
|
|
|
if ( preg_match('/[\x80-\xff]/', $value) )
|
|
{
|
|
// apparently mac uses some MacRoman crap encoding. I don't know
|
|
// how to detect it so a plugin should do the trick.
|
|
$value = trigger_event('clean_iptc_value', $value);
|
|
if ( ($qual = qualify_utf8($value)) != 0)
|
|
{// has non ascii chars
|
|
if ($qual>0)
|
|
{
|
|
$input_encoding = 'utf-8';
|
|
}
|
|
else
|
|
{
|
|
$input_encoding = 'iso-8859-1';
|
|
if (function_exists('iconv') or function_exists('mb_convert_encoding'))
|
|
{
|
|
// using windows-1252 because it supports additional characters
|
|
// such as "oe" in a single character (ligature). About the
|
|
// difference between Windows-1252 and ISO-8859-1: the characters
|
|
// 0x80-0x9F will not convert correctly. But these are control
|
|
// characters which are almost never used.
|
|
$input_encoding = 'windows-1252';
|
|
}
|
|
}
|
|
|
|
$value = convert_charset($value, $input_encoding, get_pwg_charset());
|
|
}
|
|
}
|
|
return $value;
|
|
}
|
|
|
|
/**
|
|
* returns informations from EXIF metadata, mapping is done at the beginning
|
|
* of the function
|
|
*
|
|
* @param string $filename
|
|
* @return array
|
|
*/
|
|
function get_exif_data($filename, $map)
|
|
{
|
|
global $conf;
|
|
|
|
$result = array();
|
|
|
|
if (!function_exists('read_exif_data'))
|
|
{
|
|
die('Exif extension not available, admin should disable exif use');
|
|
}
|
|
|
|
// Read EXIF data
|
|
if ($exif = @read_exif_data($filename))
|
|
{
|
|
$exif = trigger_event('format_exif_data', $exif, $filename, $map );
|
|
foreach ($map as $key => $field)
|
|
{
|
|
if (strpos($field, ';') === false)
|
|
{
|
|
if (isset($exif[$field]))
|
|
{
|
|
$result[$key] = $exif[$field];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
$tokens = explode(';', $field);
|
|
if (isset($exif[$tokens[0]][$tokens[1]]))
|
|
{
|
|
$result[$key] = $exif[$tokens[0]][$tokens[1]];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!$conf['allow_html_in_metadata'])
|
|
{
|
|
foreach ($result as $key => $value)
|
|
{
|
|
// in case the origin of the photo is unsecure (user upload), we remove
|
|
// HTML tags to avoid XSS (malicious execution of javascript)
|
|
$result[$key] = strip_tags($value);
|
|
}
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
?>
|