+/**
+ * Parse and validate charset name string (see #1485758).
+ * Sometimes charset string is malformed, there are also charset aliases
+ * but we need strict names for charset conversion (specially utf8 class)
+ *
+ * @param string Input charset name
+ * @return The validated charset name
+ */
+function rcube_parse_charset($charset)
+ {
+ $charset = strtoupper($charset);
+
+ # RFC1642
+ $charset = str_replace('UNICODE-1-1-', '', $charset);
+
+ # Aliases: some of them from HTML5 spec.
+ $aliases = array(
+ 'USASCII' => 'WINDOWS-1252',
+ 'ANSIX31101983' => 'WINDOWS-1252',
+ 'ANSIX341968' => 'WINDOWS-1252',
+ 'UNKNOWN8BIT' => 'ISO-8859-15',
+ 'UNKNOWN' => 'ISO-8859-15',
+ 'USERDEFINED' => 'ISO-8859-15',
+ 'KSC56011987' => 'EUC-KR',
+ 'GB2312' => 'GBK',
+ 'GB231280' => 'GBK',
+ 'UNICODE' => 'UTF-8',
+ 'UTF7IMAP' => 'UTF7-IMAP',
+ 'TIS620' => 'WINDOWS-874',
+ 'ISO88599' => 'WINDOWS-1254',
+ 'ISO885911' => 'WINDOWS-874',
+ );
+
+ // allow a-z and 0-9 only and remove X- prefix (e.g. X-ROMAN8 => ROMAN8)
+ $str = preg_replace(array('/[^a-z0-9]/i', '/^x+/i'), '', $charset);
+
+ if (isset($aliases[$str]))
+ return $aliases[$str];
+
+ if (preg_match('/UTF(7|8|16|32)(BE|LE)*/', $str, $m))
+ return 'UTF-' . $m[1] . $m[2];
+
+ if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) {
+ $iso = 'ISO-8859-' . ($m[1] ? $m[1] : 1);
+ # some clients sends windows-1252 text as latin1,
+ # it is safe to use windows-1252 for all latin1
+ return $iso == 'ISO-8859-1' ? 'WINDOWS-1252' : $iso;
+ }
+
+ return $charset;
+ }
+
+
+/**
+ * Converts string from standard UTF-7 (RFC 2152) to UTF-8.
+ *
+ * @param string Input string
+ * @return The converted string
+ */
+function rcube_utf7_to_utf8($str)
+{
+ $Index_64 = array(
+ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+ 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,
+ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,
+ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
+ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
+ );
+
+ $u7len = strlen($str);
+ $str = strval($str);
+ $res = '';
+
+ for ($i=0; $u7len > 0; $i++, $u7len--)
+ {
+ $u7 = $str[$i];
+ if ($u7 == '+')
+ {
+ $i++;
+ $u7len--;
+ $ch = '';
+
+ for (; $u7len > 0; $i++, $u7len--)
+ {
+ $u7 = $str[$i];
+
+ if (!$Index_64[ord($u7)])
+ break;
+
+ $ch .= $u7;
+ }
+
+ if ($ch == '') {
+ if ($u7 == '-')
+ $res .= '+';
+ continue;
+ }
+
+ $res .= rcube_utf16_to_utf8(base64_decode($ch));
+ }
+ else
+ {
+ $res .= $u7;
+ }
+ }
+
+ return $res;
+}
+
+/**
+ * Converts string from UTF-16 to UTF-8 (helper for utf-7 to utf-8 conversion)
+ *
+ * @param string Input string
+ * @return The converted string
+ */
+function rcube_utf16_to_utf8($str)
+{
+ $len = strlen($str);
+ $dec = '';
+
+ for ($i = 0; $i < $len; $i += 2) {
+ $c = ord($str[$i]) << 8 | ord($str[$i + 1]);
+ if ($c >= 0x0001 && $c <= 0x007F) {
+ $dec .= chr($c);
+ } else if ($c > 0x07FF) {
+ $dec .= chr(0xE0 | (($c >> 12) & 0x0F));
+ $dec .= chr(0x80 | (($c >> 6) & 0x3F));
+ $dec .= chr(0x80 | (($c >> 0) & 0x3F));
+ } else {
+ $dec .= chr(0xC0 | (($c >> 6) & 0x1F));
+ $dec .= chr(0x80 | (($c >> 0) & 0x3F));
+ }
+ }
+ return $dec;
+}
+
+