-function rcmail_html_filter($html)
- {
- preg_match_all('/<\/?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)\/?>/', $html, $tags);
-
- /* From Squirrelmail: Translate all dangerous Unicode or Shift_JIS characters which are accepted by
- * IE as regular characters. */
- $replace = array(array('ʟ', 'ʟ', /* L UNICODE IPA Extension */
- 'ʀ', 'ʀ', /* R UNICODE IPA Extension */
- 'ɴ', 'ɴ', /* N UNICODE IPA Extension */
- 'E', 'E', /* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
- 'e', 'e', /* Unicode FULLWIDTH LATIN SMALL LETTER E */
- 'X', 'X', /* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
- 'x', 'x', /* Unicode FULLWIDTH LATIN SMALL LETTER X */
- 'P', 'P', /* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
- 'p', 'p', /* Unicode FULLWIDTH LATIN SMALL LETTER P */
- 'R', 'R', /* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
- 'r', 'r', /* Unicode FULLWIDTH LATIN SMALL LETTER R */
- 'S', 'S', /* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
- 's', 's', /* Unicode FULLWIDTH LATIN SMALL LETTER S */
- 'I', 'I', /* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
- 'i', 'i', /* Unicode FULLWIDTH LATIN SMALL LETTER I */
- 'O', 'O', /* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
- 'o', 'o', /* Unicode FULLWIDTH LATIN SMALL LETTER O */
- 'N', 'N', /* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
- 'n', 'n', /* Unicode FULLWIDTH LATIN SMALL LETTER N */
- 'L', 'L', /* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
- 'l', 'l', /* Unicode FULLWIDTH LATIN SMALL LETTER L */
- 'U', 'U', /* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
- 'u', 'u', /* Unicode FULLWIDTH LATIN SMALL LETTER U */
- 'ⁿ', 'ⁿ' , /* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
- "\xEF\xBC\xA5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */
- /* in unicode this is some Chinese char range */
- "\xEF\xBD\x85", /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
- "\xEF\xBC\xB8", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
- "\xEF\xBD\x98", /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
- "\xEF\xBC\xB0", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
- "\xEF\xBD\x90", /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
- "\xEF\xBC\xB2", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
- "\xEF\xBD\x92", /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
- "\xEF\xBC\xB3", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
- "\xEF\xBD\x93", /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
- "\xEF\xBC\xA9", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
- "\xEF\xBD\x89", /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
- "\xEF\xBC\xAF", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
- "\xEF\xBD\x8F", /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
- "\xEF\xBC\xAE", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
- "\xEF\xBD\x8E", /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
- "\xEF\xBC\xAC", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER L */
- "\xEF\xBD\x8C", /* Shift JIS FULLWIDTH LATIN SMALL LETTER L */
- "\xEF\xBC\xB5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER U */
- "\xEF\xBD\x95", /* Shift JIS FULLWIDTH LATIN SMALL LETTER U */
- "\xE2\x81\xBF", /* Shift JIS FULLWIDTH SUPERSCRIPT N */
- "\xCA\x9F", /* L UNICODE IPA Extension */
- "\xCA\x80", /* R UNICODE IPA Extension */
- "\xC9\xB4"), /* N UNICODE IPA Extension */
- array('l', 'l', 'r', 'r', 'n', 'n', 'E', 'E', 'e', 'e', 'X', 'X', 'x', 'x',
- 'P', 'P', 'p', 'p', 'R', 'R', 'r', 'r', 'S', 'S', 's', 's', 'I', 'I',
- 'i', 'i', 'O', 'O', 'o', 'o', 'N', 'N', 'n', 'n', 'L', 'L', 'l', 'l',
- 'U', 'U', 'u', 'u', 'n', 'n', 'E', 'e', 'X', 'x', 'P', 'p', 'R', 'r',
- 'S', 's', 'I', 'i', 'O', 'o', 'N', 'n', 'L', 'l', 'U', 'u', 'n', 'l', 'r', 'n'));
- if ((count($tags)>3) && (count($tags[3])>0))
- foreach ($tags[3] as $nr=>$value)
- {
- /* Remove comments */
- $newvalue = preg_replace('/(\/\*.*\*\/)/','$2',$value);
- /* Translate dangerous characters */
- $newvalue = str_replace($replace[0], $replace[1], $newvalue);
- sq_defang($newvalue);
- /* Rename dangerous CSS */
- $newvalue = preg_replace('/expression/i', 'idiocy', $newvalue);
- $newvalue = preg_replace('/url/i', 'idiocy', $newvalue);
- $newattrs = preg_replace('/'.preg_quote($value, '/').'$/', $newvalue, $tags[1][$nr]);
- $newtag = preg_replace('/'.preg_quote($tags[1][$nr], '/').'/', $newattrs, $tags[0][$nr]);
- $html = preg_replace('/'.preg_quote($tags[0][$nr], '/').'/', $newtag, $html);
- }
- return $html;
+ // charset was converted to UTF-8 in rcube_imap::get_message_part(),
+ // -> change charset specification in HTML accordingly
+ $charset_pattern = '(<meta\s+[^>]*content=)[\'"]?(\w+\/\w+;\s*charset=)([a-z0-9-_]+[\'"]?)';
+ if (preg_match("/$charset_pattern/Ui", $html)) {
+ $html = preg_replace("/$charset_pattern/i", '\\1"\\2'.RCMAIL_CHARSET.'"', $html);
+ }
+ else {
+ // add meta content-type to malformed messages, washtml cannot work without that
+ if (!preg_match('/<head[^>]*>(.*)<\/head>/Uims', $html))
+ $html = '<head></head>'. $html;
+ $html = substr_replace($html, '<meta http-equiv="Content-Type" content="text/html; charset='.RCMAIL_CHARSET.'" />', intval(stripos($html, '<head>')+6), 0);
+ }
+ // turn relative into absolute urls
+ $html = rcmail_resolve_base($html);
+
+ // clean HTML with washhtml by Frederic Motte
+ $wash_opts = array(
+ 'show_washed' => false,
+ 'allow_remote' => $p['safe'],
+ 'blocked_src' => "./program/blocked.gif",
+ 'charset' => RCMAIL_CHARSET,
+ 'cid_map' => $cid_replaces,
+ 'html_elements' => array('body'),
+ );
+
+ if (!$p['inline_html']) {
+ $wash_opts['html_elements'] = array('html','head','title','body');
+ }
+ if ($p['safe']) {
+ $wash_opts['html_elements'][] = 'link';
+ $wash_opts['html_attribs'] = array('rel','type');