+/**
+ * Parse and validate charset name string (see #1485758).
+ * Sometimes charset string is malformed, there are also charset aliases
+ * but we need strict names for charset conversion (specially utf8 class)
+ *
+ * @param string Input charset name
+ * @return string The validated charset name
+ */
+function rcube_parse_charset($input)
+ {
+ static $charsets = array();
+ $charset = strtoupper($input);
+
+ if (isset($charsets[$input]))
+ return $charsets[$input];
+
+ $charset = preg_replace(array(
+ '/^[^0-9A-Z]+/', // e.g. _ISO-8859-JP$SIO
+ '/\$.*$/', // e.g. _ISO-8859-JP$SIO
+ '/UNICODE-1-1-*/', // RFC1641/1642
+ '/^X-/', // X- prefix (e.g. X-ROMAN8 => ROMAN8)
+ ), '', $charset);
+
+ if ($charset == 'BINARY')
+ return $charsets[$input] = null;
+
+ # Aliases: some of them from HTML5 spec.
+ $aliases = array(
+ 'USASCII' => 'WINDOWS-1252',
+ 'ANSIX31101983' => 'WINDOWS-1252',
+ 'ANSIX341968' => 'WINDOWS-1252',
+ 'UNKNOWN8BIT' => 'ISO-8859-15',
+ 'UNKNOWN' => 'ISO-8859-15',
+ 'USERDEFINED' => 'ISO-8859-15',
+ 'KSC56011987' => 'EUC-KR',
+ 'GB2312' => 'GBK',
+ 'GB231280' => 'GBK',
+ 'UNICODE' => 'UTF-8',
+ 'UTF7IMAP' => 'UTF7-IMAP',
+ 'TIS620' => 'WINDOWS-874',
+ 'ISO88599' => 'WINDOWS-1254',
+ 'ISO885911' => 'WINDOWS-874',
+ 'MACROMAN' => 'MACINTOSH',
+ '77' => 'MAC',
+ '128' => 'SHIFT-JIS',
+ '129' => 'CP949',
+ '130' => 'CP1361',
+ '134' => 'GBK',
+ '136' => 'BIG5',
+ '161' => 'WINDOWS-1253',
+ '162' => 'WINDOWS-1254',
+ '163' => 'WINDOWS-1258',
+ '177' => 'WINDOWS-1255',
+ '178' => 'WINDOWS-1256',
+ '186' => 'WINDOWS-1257',
+ '204' => 'WINDOWS-1251',
+ '222' => 'WINDOWS-874',
+ '238' => 'WINDOWS-1250',
+ 'MS950' => 'CP950',
+ 'WINDOWS949' => 'UHC',
+ );
+
+ // allow A-Z and 0-9 only
+ $str = preg_replace('/[^A-Z0-9]/', '', $charset);
+
+ if (isset($aliases[$str]))
+ $result = $aliases[$str];
+ // UTF
+ else if (preg_match('/U[A-Z][A-Z](7|8|16|32)(BE|LE)*/', $str, $m))
+ $result = 'UTF-' . $m[1] . $m[2];
+ // ISO-8859
+ else if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) {
+ $iso = 'ISO-8859-' . ($m[1] ? $m[1] : 1);
+ // some clients sends windows-1252 text as latin1,
+ // it is safe to use windows-1252 for all latin1
+ $result = $iso == 'ISO-8859-1' ? 'WINDOWS-1252' : $iso;
+ }
+ // handle broken charset names e.g. WINDOWS-1250HTTP-EQUIVCONTENT-TYPE
+ else if (preg_match('/(WIN|WINDOWS)([0-9]+)/', $str, $m)) {
+ $result = 'WINDOWS-' . $m[2];
+ }
+ // LATIN
+ else if (preg_match('/LATIN(.*)/', $str, $m)) {
+ $aliases = array('2' => 2, '3' => 3, '4' => 4, '5' => 9, '6' => 10,
+ '7' => 13, '8' => 14, '9' => 15, '10' => 16,
+ 'ARABIC' => 6, 'CYRILLIC' => 5, 'GREEK' => 7, 'GREEK1' => 7, 'HEBREW' => 8);
+
+ // some clients sends windows-1252 text as latin1,
+ // it is safe to use windows-1252 for all latin1
+ if ($m[1] == 1) {
+ $result = 'WINDOWS-1252';
+ }
+ // if iconv is not supported we need ISO labels, it's also safe for iconv
+ else if (!empty($aliases[$m[1]])) {
+ $result = 'ISO-8859-'.$aliases[$m[1]];
+ }
+ // iconv requires convertion of e.g. LATIN-1 to LATIN1
+ else {
+ $result = $str;
+ }
+ }
+ else {
+ $result = $charset;
+ }
+
+ $charsets[$input] = $result;
+
+ return $result;
+ }
+
+
+/**
+ * Converts string from standard UTF-7 (RFC 2152) to UTF-8.
+ *
+ * @param string Input string
+ * @return string The converted string
+ */
+function rcube_utf7_to_utf8($str)
+{
+ $Index_64 = array(
+ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+ 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,
+ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,
+ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
+ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
+ );
+
+ $u7len = strlen($str);
+ $str = strval($str);
+ $res = '';
+
+ for ($i=0; $u7len > 0; $i++, $u7len--)
+ {
+ $u7 = $str[$i];
+ if ($u7 == '+')
+ {
+ $i++;
+ $u7len--;
+ $ch = '';
+
+ for (; $u7len > 0; $i++, $u7len--)
+ {
+ $u7 = $str[$i];
+
+ if (!$Index_64[ord($u7)])
+ break;
+
+ $ch .= $u7;
+ }
+
+ if ($ch == '') {
+ if ($u7 == '-')
+ $res .= '+';
+ continue;
+ }
+
+ $res .= rcube_utf16_to_utf8(base64_decode($ch));
+ }
+ else
+ {
+ $res .= $u7;
+ }
+ }
+
+ return $res;
+}
+
+/**
+ * Converts string from UTF-16 to UTF-8 (helper for utf-7 to utf-8 conversion)
+ *
+ * @param string Input string
+ * @return string The converted string
+ */
+function rcube_utf16_to_utf8($str)
+{
+ $len = strlen($str);
+ $dec = '';
+
+ for ($i = 0; $i < $len; $i += 2) {
+ $c = ord($str[$i]) << 8 | ord($str[$i + 1]);
+ if ($c >= 0x0001 && $c <= 0x007F) {
+ $dec .= chr($c);
+ } else if ($c > 0x07FF) {
+ $dec .= chr(0xE0 | (($c >> 12) & 0x0F));
+ $dec .= chr(0x80 | (($c >> 6) & 0x3F));
+ $dec .= chr(0x80 | (($c >> 0) & 0x3F));
+ } else {
+ $dec .= chr(0xC0 | (($c >> 6) & 0x1F));
+ $dec .= chr(0x80 | (($c >> 0) & 0x3F));
+ }
+ }
+ return $dec;
+}
+
+