4 +-----------------------------------------------------------------------+
5 | program/include/rcube_spellchecker.php |
7 | This file is part of the Roundcube Webmail client |
8 | Copyright (C) 2011, Kolab Systems AG |
9 | Copyright (C) 2008-2011, The Roundcube Dev Team |
10 | Licensed under the GNU GPL |
13 | Spellchecking using different backends |
15 +-----------------------------------------------------------------------+
16 | Author: Aleksander Machniak <machniak@kolabsys.com> |
17 | Author: Thomas Bruederli <roundcube@gmail.com> |
18 +-----------------------------------------------------------------------+
20 $Id: rcube_spellchecker.php 4817 2011-05-30 17:08:47Z alec $
26 * Helper class for spellchecking with Googielspell and PSpell support.
30 class rcube_spellchecker
32 private $matches = array();
37 private $separator = '/[ !"#$%&()*+\\,\/\n:;<=>?@\[\]^_{|}-]+|\.[^\w]/';
41 const GOOGLE_HOST = 'ssl://www.google.com';
42 const GOOGLE_PORT = 443;
43 const MAX_SUGGESTIONS = 10;
49 * @param string $lang Language code
51 function __construct($lang = 'en')
53 $this->rc = rcmail::get_instance();
54 $this->engine = $this->rc->config->get('spellcheck_engine', 'googie');
55 $this->lang = $lang ? $lang : 'en';
57 if ($this->engine == 'pspell' && !extension_loaded('pspell')) {
59 'code' => 500, 'type' => 'php',
60 'file' => __FILE__, 'line' => __LINE__,
61 'message' => "Pspell extension not available"), true, true);
67 * Set content and check spelling
69 * @param string $text Text content for spellchecking
70 * @param bool $is_html Enables HTML-to-Text conversion
72 * @return bool True when no mispelling found, otherwise false
74 function check($text, $is_html=false)
76 // convert to plain text
78 $this->content = $this->html2text($text);
81 $this->content = $text;
84 if ($this->engine == 'pspell') {
85 $this->matches = $this->_pspell_check($this->content);
88 $this->matches = $this->_googie_check($this->content);
91 return $this->found() == 0;
96 * Number of mispellings found (after check)
98 * @return int Number of mispellings
102 return count($this->matches);
107 * Returns suggestions for the specified word
109 * @param string $word The word
111 * @return array Suggestions list
113 function get_suggestions($word)
115 if ($this->engine == 'pspell') {
116 return $this->_pspell_suggestions($word);
119 return $this->_googie_suggestions($word);
124 * Returns mispelled words
126 * @param string $text The content for spellchecking. If empty content
127 * used for check() method will be used.
129 * @return array List of mispelled words
131 function get_words($text = null, $is_html=false)
133 if ($this->engine == 'pspell') {
134 return $this->_pspell_words($text, $is_html);
137 return $this->_googie_words($text, $is_html);
142 * Returns checking result in XML (Googiespell) format
144 * @return string XML content
149 $out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
151 foreach ($this->matches as $item) {
152 $out .= '<c o="'.$item[1].'" l="'.$item[2].'">';
153 $out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
157 $out .= '</spellresult>';
164 * Returns checking result (mispelled words with suggestions)
166 * @return array Spellchecking result. An array indexed by word.
172 foreach ($this->matches as $item) {
173 if ($this->engine == 'pspell') {
177 $word = mb_substr($this->content, $item[1], $item[2], RCMAIL_CHARSET);
179 $result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
187 * Returns error message
189 * @return string Error message
198 * Checks the text using pspell
200 * @param string $text Text content for spellchecking
202 private function _pspell_check($text)
205 $this->_pspell_init();
212 $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
217 foreach ($text as $w) {
219 $pos = $w[1] - $diff;
220 $len = mb_strlen($word);
222 if ($word && preg_match('/[^0-9\.]/', $word) && !pspell_check($this->plink, $word)) {
223 $suggestions = pspell_suggest($this->plink, $word);
225 if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
226 $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
228 $matches[] = array($word, $pos, $len, null, $suggestions);
231 $diff += (strlen($word) - $len);
239 * Returns the mispelled words
241 private function _pspell_words($text = null, $is_html=false)
245 $this->_pspell_init();
251 // With PSpell we don't need to get suggestions to return mispelled words
253 $text = $this->html2text($text);
256 $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
258 foreach ($text as $w) {
260 if ($word && preg_match('/[^0-9\.]/', $word) && !pspell_check($this->plink, $word)) {
270 foreach ($this->matches as $m) {
279 * Returns suggestions for mispelled word
281 private function _pspell_suggestions($word)
284 $this->_pspell_init();
290 $suggestions = pspell_suggest($this->plink, $word);
292 if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
293 $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
295 return is_array($suggestions) ? $suggestions : array();
300 * Initializes PSpell dictionary
302 private function _pspell_init()
305 $this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST);
309 $this->error = "Unable to load Pspell engine for selected language";
314 private function _googie_check($text)
316 // spell check uri is configured
317 $url = $this->rc->config->get('spellcheck_uri');
320 $a_uri = parse_url($url);
321 $ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl');
322 $port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80);
323 $host = ($ssl ? 'ssl://' : '') . $a_uri['host'];
324 $path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang;
327 $host = self::GOOGLE_HOST;
328 $port = self::GOOGLE_PORT;
329 $path = '/tbproxy/spell?lang=' . $this->lang;
332 // Google has some problem with spaces, use \n instead
333 $text = str_replace(' ', "\n", $text);
335 $text = '<?xml version="1.0" encoding="utf-8" ?>'
336 .'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
337 .'<text>' . $text . '</text>'
341 if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) {
342 $out = "POST $path HTTP/1.0\r\n";
343 $out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n";
344 $out .= "Content-Length: " . strlen($text) . "\r\n";
345 $out .= "Content-Type: application/x-www-form-urlencoded\r\n";
346 $out .= "Connection: Close\r\n\r\n";
351 $store .= fgets($fp, 128);
356 $this->error = "Empty result from spelling engine";
359 preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER);
365 private function _googie_words($text = null, $is_html=false)
369 $text = $this->html2text($text);
372 $matches = $this->_googie_check($text);
375 $matches = $this->matches;
376 $text = $this->content;
381 foreach ($matches as $m) {
382 $result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
389 private function _googie_suggestions($word)
392 $matches = $this->_googie_check($word);
395 $matches = $this->matches;
398 if ($matches[0][4]) {
399 $suggestions = explode("\t", $matches[0][4]);
400 if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
401 $suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS);
411 private function html2text($text)
413 $h2t = new html2text($text, false, true, 0);
414 return $h2t->get_text();