From 2faa7204e94e7524930ea31d7f85d4b4d5b33346 Mon Sep 17 00:00:00 2001 From: plegall Date: Thu, 17 Oct 2013 20:11:45 +0000 Subject: bug 2973 fixed: automatically use encoding windows-1252 instead of iso-8859-1 (unless we find utf-8 signs) on IPTC before converting to utf-8 git-svn-id: http://piwigo.org/svn/branches/2.5@24967 68402e56-0260-453c-a942-63ccdbb3a9ee --- include/functions_metadata.inc.php | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/include/functions_metadata.inc.php b/include/functions_metadata.inc.php index 97724abc1..0727feb1e 100644 --- a/include/functions_metadata.inc.php +++ b/include/functions_metadata.inc.php @@ -102,9 +102,25 @@ function clean_iptc_value($value) $value = trigger_event('clean_iptc_value', $value); if ( ($qual = qualify_utf8($value)) != 0) {// has non ascii chars - $value = convert_charset( $value, - $qual>0 ? 'utf-8' : 'iso-8859-1', - get_pwg_charset() ); + if ($qual>0) + { + $input_encoding = 'utf-8'; + } + else + { + $input_encoding = 'iso-8859-1'; + if (function_exists('iconv') or function_exists('mb_convert_encoding')) + { + // using windows-1252 because it supports additional characters + // such as "oe" in a single character (ligature). About the + // difference between Windows-1252 and ISO-8859-1: the characters + // 0x80-0x9F will not convert correctly. But these are control + // characters which are almost never used. + $input_encoding = 'windows-1252'; + } + } + + $value = convert_charset($value, $input_encoding, get_pwg_charset()); } } return $value; -- cgit v1.2.3