X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2FModules%2Fbabel.pl;h=56dc42797dc50da2fd18d0a80888fbb50abb73b8;hb=f7cae48a17d6decd0a9bd997188271daa0a885b1;hp=4b1a957b597ddfa9323727c74a371040b7067e4e;hpb=5265c2ed8e78478a2ce7429786eded93806b555f;p=infobot.git diff --git a/src/Modules/babel.pl b/src/Modules/babel.pl index 4b1a957..56dc427 100644 --- a/src/Modules/babel.pl +++ b/src/Modules/babel.pl @@ -27,23 +27,23 @@ BEGIN { # expects. use vars qw!%lang_code $lang_regex!; %lang_code = ( - 'fr' => 'fr', - 'sp' => 'es', - 'es' => 'es', - 'po' => 'pt', - 'pt' => 'pt', - 'it' => 'it', - 'ge' => 'de', - 'de' => 'de', - 'gr' => 'de', - 'en' => 'en', - 'zh' => 'zh', - 'ja' => 'ja', - 'jp' => 'ja', - 'ko' => 'ko', - 'kr' => 'ko', - 'ru' => 'ru' - ); + 'fr' => 'fr', + 'sp' => 'es', + 'es' => 'es', + 'po' => 'pt', + 'pt' => 'pt', + 'it' => 'it', + 'ge' => 'de', + 'de' => 'de', + 'gr' => 'de', + 'en' => 'en', + 'zh' => 'zh', + 'ja' => 'ja', + 'jp' => 'ja', + 'ko' => 'ko', + 'kr' => 'ko', + 'ru' => 'ru' + ); # Here's how we recognize the language you're asking for. It looks # like RTSL saves you a few keystrokes in #perl, huh? @@ -53,16 +53,20 @@ BEGIN { sub babelfish { return '' if $no_babel; my ($from, $to, $phrase) = @_; - #&main::DEBUG("babelfish($from, $to, $phrase)"); + &main::DEBUG("babelfish($from, $to, $phrase)"); $from = $lang_code{$from}; $to = $lang_code{$to}; my $ua = new LWP::UserAgent; + $ua->proxy('http', $::param{'httpProxy'}) if (&::IsParam("httpProxy")); + # Let's pretend + $ua->agent("Mozilla/4.5 " . $ua->agent); $ua->timeout(5); my $req = - HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn'); + #HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn'); + HTTP::Request->new('POST', 'http://babelfish.altavista.com/babelfish/tr'); # babelfish ignored this, but it SHOULD work # Accept-Charset: iso-8859-1 @@ -76,11 +80,11 @@ sub babelfish { sub translate { return '' if $no_babel; my ($phrase, $languagepair, $req, $ua) = @_; - #&main::DEBUG("translate($phrase, $languagepair, $req, $ua)"); + &main::DEBUG("translate($phrase, $languagepair, $req, $ua)"); my $urltext = uri_escape($phrase); $req->content("urltext=$urltext&lp=$languagepair"); - #&main::DEBUG("http://babelfish.altavista.com/raging/translate.dyn??urltext=$urltext&lp=$languagepair"); + &main::DEBUG("http://babelfish.altavista.com/babelfish/tr??urltext=$urltext&lp=$languagepair"); my $res = $ua->request($req); my $translated; @@ -89,40 +93,21 @@ sub translate { my $html = $res->content; # This method subject to change with the whims of Altavista's design # staff. - $html =~ s/\s+/ /sg; - #&main::DEBUG("$html\n===============\n"); - # look for the first :< which should be the "To English:<", etc. - # strip any trailing tags, grab text that follows up to the next tag. - #my ($translated) = ($html =~ m{:\s*(<[^>]*>\s*)+([^<]*)}sx); ($translated) = $html; - #(undef, $translated) = ($html =~ m{(:\s+(<[^>]*>\s*)+)([^<\s]*)<}sx); - - # Tim@Rikers.org get's frustrated and splits this into steps: - # 1) remove everything up to the first ':' in the text - $translated =~ s/.*?:\s* till the first text - $translated =~ s/(<[^>]*>\s*)*//s; - # 3) remove the first trailing and everything after it - $translated =~ s/<.*//s; - - # look for contents of first textarea - not anymore cause > 40 char does not get one. - #my ($translated) = ($html =~ m{]*>+([^<]*)}sx); - #&main::DEBUG("\"$translated\"\n===============\n"); -# ($html =~ m{]*> -# \s* -# ([^<]*) -# }sx); -# ($html =~ m{
-# \s+ -# -# \s* -# (?:\*\*\s+time\ out\s+\*\*)? -# \s* -# ([^<]*) -# }sx); + + $translated =~ s/<[^>]*>//sg; + $translated =~ s/ / /sg; + $translated =~ s/\s+/ /sg; + #&main::DEBUG("$translated\n===remove \n"); + + $translated =~ s/\s*Translate again.*//i; + &main::DEBUG("$translated\n===remove after 'Translate again'\n"); + + $translated =~ s/[^:]*?:\s*(Help\s*)?//s; + &main::DEBUG("$translated\n===remove to first ':', optional Help\n"); + $translated =~ s/\n/ /g; - $translated =~ s/\s*$//; - # need a way to do unicode->iso + # FIXME should we do unicode->iso } else { $translated = ":("; # failure } @@ -131,11 +116,11 @@ sub translate { if (0) { if (-t STDIN) { - #my $result = babel::babelfish('en','sp','hello world'); - #my $result = babel::babelfish('en','sp','The cheese is old and moldy, where is the bathroom?'); - my $result = babel::babelfish('en','gr','doesn\'t seem to translate things longer than 40 characters'); - $result =~ s/; /\n/g; - print "Babelfish says: \"$result\"\n"; + #my $result = babel::babelfish('en','sp','hello world'); + #my $result = babel::babelfish('en','sp','The cheese is old and moldy, where is the bathroom?'); + my $result = babel::babelfish('en','gr','doesn\'t seem to translate things longer than 40 characters'); + $result =~ s/; /\n/g; + print "Babelfish says: \"$result\"\n"; } }