4 +-----------------------------------------------------------------------+
5 | program/include/rcube_spellchecker.php |
7 | This file is part of the Roundcube Webmail client |
8 | Copyright (C) 2011, Kolab Systems AG |
9 | Copyright (C) 2008-2011, The Roundcube Dev Team |
10 | Licensed under the GNU GPL |
13 | Spellchecking using different backends |
15 +-----------------------------------------------------------------------+
16 | Author: Aleksander Machniak <machniak@kolabsys.com> |
17 | Author: Thomas Bruederli <roundcube@gmail.com> |
18 +-----------------------------------------------------------------------+
20 $Id: rcube_spellchecker.php 5181 2011-09-06 13:39:45Z alec $
26 * Helper class for spellchecking with Googielspell and PSpell support.
30 class rcube_spellchecker
32 private $matches = array();
37 private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.]([^\w]|$)/';
38 private $options = array();
44 const GOOGLE_HOST = 'ssl://www.google.com';
45 const GOOGLE_PORT = 443;
46 const MAX_SUGGESTIONS = 10;
52 * @param string $lang Language code
54 function __construct($lang = 'en')
56 $this->rc = rcmail::get_instance();
57 $this->engine = $this->rc->config->get('spellcheck_engine', 'googie');
58 $this->lang = $lang ? $lang : 'en';
60 if ($this->engine == 'pspell' && !extension_loaded('pspell')) {
62 'code' => 500, 'type' => 'php',
63 'file' => __FILE__, 'line' => __LINE__,
64 'message' => "Pspell extension not available"), true, true);
67 $this->options = array(
68 'ignore_syms' => $this->rc->config->get('spellcheck_ignore_syms'),
69 'ignore_nums' => $this->rc->config->get('spellcheck_ignore_nums'),
70 'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'),
71 'dictionary' => $this->rc->config->get('spellcheck_dictionary'),
77 * Set content and check spelling
79 * @param string $text Text content for spellchecking
80 * @param bool $is_html Enables HTML-to-Text conversion
82 * @return bool True when no mispelling found, otherwise false
84 function check($text, $is_html = false)
86 // convert to plain text
88 $this->content = $this->html2text($text);
91 $this->content = $text;
94 if ($this->engine == 'pspell') {
95 $this->matches = $this->_pspell_check($this->content);
98 $this->matches = $this->_googie_check($this->content);
101 return $this->found() == 0;
106 * Number of mispellings found (after check)
108 * @return int Number of mispellings
112 return count($this->matches);
117 * Returns suggestions for the specified word
119 * @param string $word The word
121 * @return array Suggestions list
123 function get_suggestions($word)
125 if ($this->engine == 'pspell') {
126 return $this->_pspell_suggestions($word);
129 return $this->_googie_suggestions($word);
134 * Returns mispelled words
136 * @param string $text The content for spellchecking. If empty content
137 * used for check() method will be used.
139 * @return array List of mispelled words
141 function get_words($text = null, $is_html=false)
143 if ($this->engine == 'pspell') {
144 return $this->_pspell_words($text, $is_html);
147 return $this->_googie_words($text, $is_html);
152 * Returns checking result in XML (Googiespell) format
154 * @return string XML content
159 $out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
161 foreach ($this->matches as $item) {
162 $out .= '<c o="'.$item[1].'" l="'.$item[2].'">';
163 $out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
167 $out .= '</spellresult>';
174 * Returns checking result (mispelled words with suggestions)
176 * @return array Spellchecking result. An array indexed by word.
182 foreach ($this->matches as $item) {
183 if ($this->engine == 'pspell') {
187 $word = mb_substr($this->content, $item[1], $item[2], RCMAIL_CHARSET);
189 $result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
197 * Returns error message
199 * @return string Error message
208 * Checks the text using pspell
210 * @param string $text Text content for spellchecking
212 private function _pspell_check($text)
215 $this->_pspell_init();
222 $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
227 foreach ($text as $w) {
229 $pos = $w[1] - $diff;
230 $len = mb_strlen($word);
233 if ($this->is_exception($word)) {
235 else if (!pspell_check($this->plink, $word)) {
236 $suggestions = pspell_suggest($this->plink, $word);
238 if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
239 $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
241 $matches[] = array($word, $pos, $len, null, $suggestions);
244 $diff += (strlen($word) - $len);
252 * Returns the mispelled words
254 private function _pspell_words($text = null, $is_html=false)
260 $this->_pspell_init();
266 // With PSpell we don't need to get suggestions to return mispelled words
268 $text = $this->html2text($text);
271 $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
273 foreach ($text as $w) {
277 if ($this->is_exception($word)) {
281 if (!pspell_check($this->plink, $word)) {
289 foreach ($this->matches as $m) {
298 * Returns suggestions for mispelled word
300 private function _pspell_suggestions($word)
303 $this->_pspell_init();
309 $suggestions = pspell_suggest($this->plink, $word);
311 if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
312 $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
314 return is_array($suggestions) ? $suggestions : array();
319 * Initializes PSpell dictionary
321 private function _pspell_init()
324 $this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST);
328 $this->error = "Unable to load Pspell engine for selected language";
333 private function _googie_check($text)
335 // spell check uri is configured
336 $url = $this->rc->config->get('spellcheck_uri');
339 $a_uri = parse_url($url);
340 $ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl');
341 $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80);
342 $host = ($ssl ? 'ssl://' : '') . $a_uri['host'];
343 $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang;
346 $host = self::GOOGLE_HOST;
347 $port = self::GOOGLE_PORT;
348 $path = '/tbproxy/spell?lang=' . $this->lang;
351 // Google has some problem with spaces, use \n instead
352 $gtext = str_replace(' ', "\n", $text);
354 $gtext = '<?xml version="1.0" encoding="utf-8" ?>'
355 .'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
356 .'<text>' . $gtext . '</text>'
360 if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) {
361 $out = "POST $path HTTP/1.0\r\n";
362 $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n";
363 $out .= "Content-Length: " . strlen($gtext) . "\r\n";
364 $out .= "Content-Type: application/x-www-form-urlencoded\r\n";
365 $out .= "Connection: Close\r\n\r\n";
370 $store .= fgets($fp, 128);
375 $this->error = "Empty result from spelling engine";
378 preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER);
380 // skip exceptions (if appropriate options are enabled)
381 if (!empty($this->options['ignore_syms']) || !empty($this->options['ignore_nums'])
382 || !empty($this->options['ignore_caps']) || !empty($this->options['dictionary'])
384 foreach ($matches as $idx => $m) {
385 $word = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
387 if ($this->is_exception($word)) {
388 unset($matches[$idx]);
397 private function _googie_words($text = null, $is_html=false)
401 $text = $this->html2text($text);
404 $matches = $this->_googie_check($text);
407 $matches = $this->matches;
408 $text = $this->content;
413 foreach ($matches as $m) {
414 $result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
421 private function _googie_suggestions($word)
424 $matches = $this->_googie_check($word);
427 $matches = $this->matches;
430 if ($matches[0][4]) {
431 $suggestions = explode("\t", $matches[0][4]);
432 if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
433 $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS);
443 private function html2text($text)
445 $h2t = new html2text($text, false, true, 0);
446 return $h2t->get_text();
451 * Check if the specified word is an exception accoring to
452 * spellcheck options.
454 * @param string $word The word
456 * @return bool True if the word is an exception, False otherwise
458 public function is_exception($word)
460 // Contain only symbols (e.g. "+9,0", "2:2")
461 if (!$word || preg_match('/^[0-9@#$%^&_+~*=:;?!,.-]+$/', $word))
464 // Contain symbols (e.g. "g@@gle"), all symbols excluding separators
465 if (!empty($this->options['ignore_syms']) && preg_match('/[@#$%^&_+~*=-]/', $word))
468 // Contain numbers (e.g. "g00g13")
469 if (!empty($this->options['ignore_nums']) && preg_match('/[0-9]/', $word))
472 // Blocked caps (e.g. "GOOGLE")
473 if (!empty($this->options['ignore_caps']) && $word == mb_strtoupper($word))
476 // Use exceptions from dictionary
477 if (!empty($this->options['dictionary'])) {
480 // @TODO: should dictionary be case-insensitive?
481 if (!empty($this->dict) && in_array($word, $this->dict))
490 * Add a word to dictionary
492 * @param string $word The word to add
494 public function add_word($word)
498 foreach (explode(' ', $word) as $word) {
500 if (strlen($word) < 512) {
501 $this->dict[] = $word;
507 $this->dict = array_unique($this->dict);
508 $this->update_dict();
514 * Remove a word from dictionary
516 * @param string $word The word to remove
518 public function remove_word($word)
522 if (($key = array_search($word, $this->dict)) !== false) {
523 unset($this->dict[$key]);
524 $this->update_dict();
530 * Update dictionary row in DB
532 private function update_dict()
534 if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
535 $userid = (int) $this->rc->user->ID;
538 $plugin = $this->rc->plugins->exec_hook('spell_dictionary_save', array(
539 'userid' => $userid, 'language' => $this->lang, 'dictionary' => $this->dict));
541 if (!empty($plugin['abort'])) {
545 if ($this->have_dict) {
546 if (!empty($this->dict)) {
547 $this->rc->db->query(
548 "UPDATE ".get_table_name('dictionary')
550 ." WHERE user_id " . ($plugin['userid'] ? "= ".$plugin['userid'] : "IS NULL")
551 ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
552 implode(' ', $plugin['dictionary']), $plugin['language']);
554 // don't store empty dict
556 $this->rc->db->query(
557 "DELETE FROM " . get_table_name('dictionary')
558 ." WHERE user_id " . ($plugin['userid'] ? "= ".$plugin['userid'] : "IS NULL")
559 ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
560 $plugin['language']);
563 else if (!empty($this->dict)) {
564 $this->rc->db->query(
565 "INSERT INTO " .get_table_name('dictionary')
566 ." (user_id, " . $this->rc->db->quoteIdentifier('language') . ", data) VALUES (?, ?, ?)",
567 $plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary']));
573 * Get dictionary from DB
575 private function load_dict()
577 if (is_array($this->dict)) {
581 if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
582 $userid = (int) $this->rc->user->ID;
585 $plugin = $this->rc->plugins->exec_hook('spell_dictionary_get', array(
586 'userid' => $userid, 'language' => $this->lang, 'dictionary' => array()));
588 if (empty($plugin['abort'])) {
590 $this->rc->db->query(
591 "SELECT data FROM ".get_table_name('dictionary')
592 ." WHERE user_id ". ($plugin['userid'] ? "= ".$plugin['userid'] : "IS NULL")
593 ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",
594 $plugin['language']);
596 if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) {
597 $this->have_dict = true;
598 if (!empty($sql_arr['data'])) {
599 $dict = explode(' ', $sql_arr['data']);
603 $plugin['dictionary'] = array_merge((array)$plugin['dictionary'], $dict);
606 if (!empty($plugin['dictionary']) && is_array($plugin['dictionary'])) {
607 $this->dict = $plugin['dictionary'];
610 $this->dict = array();