"/[\n\t]+/", // Newlines and tabs
'/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
//'/<!-- .* -->/', // Comments -- which strip_tags might have problem a with
- '/<a href="([^"]+)"[^>]*>(.+?)<\/a>/ie', // <a href="">
+ '/<a [^>]*href=("|\')([^"\']+)\1[^>]*>(.+?)<\/a>/ie', // <a href="">
'/<h[123][^>]*>(.+?)<\/h[123]>/ie', // H1 - H3
'/<h[456][^>]*>(.+?)<\/h[456]>/ie', // H4 - H6
'/<p[^>]*>/i', // <P>
'/(<table[^>]*>|<\/table>)/i', // <table> and </table>
'/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr>
'/<td[^>]*>(.+?)<\/td>/i', // <td> and </td>
- '/<th[^>]*>(.+?)<\/th>/i', // <th> and </th>
+ '/<th[^>]*>(.+?)<\/th>/ie', // <th> and </th>
'/ /i',
'/"/i',
'/>/i',
'/</i',
- '/&/i',
+ '/&(amp|#38);/i',
'/©/i',
'/™/i',
'/“/',
'/”/',
'/–/',
- '/’/',
- '/&/',
+ '/&#(8217|39);/',
'/©/',
'/™/',
'/—/',
'', // Non-legal carriage return
' ', // Newlines and tabs
'', // <script>s -- which strip_tags supposedly has problems with
- //'', // Comments -- which strip_tags might have problem a with
- '$this->_build_link_list("\\1", "\\2")', // <a href="">
+ //'', // Comments -- which strip_tags might have problem a with
+ '$this->_build_link_list("\\2", "\\3")', // <a href="">
"strtoupper(\"\n\n\\1\n\n\")", // H1 - H3
- "ucwords(\"\n\n\\1\n\n\")", // H4 - H6
- "\n\n\t", // <P>
+ "ucwords(\"\n\n\\1\n\")", // H4 - H6
+ "\n\n", // <P>
"\n", // <br>
'strtoupper("\\1")', // <b>
'_\\1_', // <i>
'"',
'-',
"'",
- '&',
'(c)',
'(tm)',
'--',
* @see _build_link_list()
*/
var $_link_list = array();
+
+ /**
+ * Boolean flag, true if a table of link URLs should be listed after the text.
+ *
+ * @var boolean $_do_links
+ * @access private
+ * @see html2text()
+ */
+ var $_do_links = true;
/**
* Constructor.
*
* @param string $source HTML content
* @param boolean $from_file Indicates $source is a file to pull content from
+ * @param boolean $do_link_table indicate whether a table of link URLs is desired
* @access public
* @return void
*/
- function html2text( $source = '', $from_file = false )
+ function html2text( $source = '', $from_file = false, $produce_link_table = true )
{
if ( !empty($source) ) {
$this->set_html($source, $from_file);
}
$this->set_base_url();
+ $this->_do_links = $produce_link_table;
}
/**
*/
function _build_link_list($link, $display)
{
+ if (! $this->_do_links) return $display;
+
$link_lc = strtolower($link);
if (substr($link_lc, 0, 7) == 'http://' || substr($link_lc, 0, 8) == 'https://' || substr($link_lc, 0, 7) == 'mailto:')