From: timriker Date: Thu, 18 Nov 2004 07:56:30 +0000 (+0000) Subject: babelfish cleanup X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=79133680ae1c61b5d56ff46c500a64f7fb35d1f0;p=infobot.git babelfish cleanup git-svn-id: https://svn.code.sf.net/p/infobot/code/trunk/blootbot@1039 c11ca15a-4712-0410-83d8-924469b57eb5 --- diff --git a/src/CommandStubs.pl b/src/CommandStubs.pl index 09cd884..edd2941 100644 --- a/src/CommandStubs.pl +++ b/src/CommandStubs.pl @@ -277,7 +277,7 @@ sub Modules { return; } - # babel bot: Jonathan Feinberg++ + # babelfish bot: Jonathan Feinberg++ if ($message =~ m{ ^\s* (?:babel(?:fish)?|x|xlate|translate) @@ -290,7 +290,7 @@ sub Modules { }xoi) { return unless (&hasParam("babelfish")); - &Forker("babelfish", sub { &babel::babelfish(lc $1, lc $2, $3); } ); + &Forker("babelfish", sub { &babelfish::babelfish(lc $1, lc $2, $3); } ); $cmdstats{'BabelFish'}++; return; diff --git a/src/Modules/babel.pl b/src/Modules/babel.pl deleted file mode 100644 index 16beeb5..0000000 --- a/src/Modules/babel.pl +++ /dev/null @@ -1,133 +0,0 @@ -# This program is copyright Jonathan Feinberg 1999. -# This program is distributed under the same terms as infobot. - -# Jonathan Feinberg -# jdf@pobox.com -# http://pobox.com/~jdf/ - -# Version 1.0 -# First public release. - -# hacked by Tim@Rikers.org to handle new URL and layout - -package babel; -use strict; - -my $no_babel; - -BEGIN { - eval "use URI::Escape"; # utility functions for encoding the - if ($@) { $no_babel++}; # babelfish request - eval "use LWP::UserAgent"; - if ($@) { $no_babel++}; -} - -BEGIN { - # Translate some feasible abbreviations into the ones babelfish - # expects. - use vars qw!%lang_code $lang_regex!; - %lang_code = ( - 'de' => 'de', - 'ge' => 'de', - 'gr' => 'el', - 'el' => 'el', - 'sp' => 'es', - 'es' => 'es', - 'en' => 'en', - 'fr' => 'fr', - 'it' => 'it', - 'ja' => 'ja', - 'jp' => 'ja', - 'ko' => 'ko', - 'kr' => 'ko', - 'nl' => 'nl', - 'po' => 'pt', - 'pt' => 'pt', - 'ru' => 'ru', - 'zh' => 'zh', - 'zt' => 'zt' - ); - - # Here's how we recognize the language you're asking for. It looks - # like RTSL saves you a few keystrokes in #perl, huh? - $lang_regex = join '|', keys %lang_code; -} - -sub babelfish { - return '' if $no_babel; - my ($from, $to, $phrase) = @_; - &main::DEBUG("babelfish($from, $to, $phrase)"); - - $from = $lang_code{$from}; - $to = $lang_code{$to}; - - my $ua = new LWP::UserAgent; - $ua->proxy('http', $::param{'httpProxy'}) if (&::IsParam("httpProxy")); - # Let's pretend - $ua->agent("Mozilla/5.0 " . $ua->agent); - $ua->timeout(5); - - my $req = - #HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn'); - HTTP::Request->new('POST', 'http://babelfish.altavista.com/babelfish/tr'); - -# babelfish ignored this, but it SHOULD work -# Accept-Charset: iso-8859-1 -# $req->header('Accept-Charset' => 'iso-8859-1'); -# print $req->header('Accept-Charset'); - $req->header('Accept-Language' => 'en'); - $req->content_type('application/x-www-form-urlencoded'); - - return translate($phrase, "${from}_${to}", $req, $ua); -} - -sub translate { - return '' if $no_babel; - my ($phrase, $languagepair, $req, $ua) = @_; - &main::DEBUG("translate($phrase, $languagepair, $req, $ua)"); - - my $urltext = uri_escape($phrase); - $req->content("urltext=$urltext&lp=$languagepair"); - &main::DEBUG("http://babelfish.altavista.com/babelfish/tr??urltext=$urltext&lp=$languagepair"); - - my $res = $ua->request($req); - my $translated; - - if ($res->is_success) { - my $html = $res->content; - # This method subject to change with the whims of Altavista's design - # staff. - ($translated) = $html; - - $translated =~ s/<[^>]*>//sg; - $translated =~ s/ / /sg; - $translated =~ s/\s+/ /sg; - #&main::DEBUG("$translated\n===remove \n"); - - $translated =~ s/\s*Translate again.*//i; - &main::DEBUG("$translated\n===remove after 'Translate again'\n"); - - $translated =~ s/[^:]*?:\s*(Help\s*)?//s; - &main::DEBUG("len=" . length($translated) . " $translated\n===remove to first ':', optional Help\n"); - - $translated =~ s/\n/ /g; - # FIXME: should we do unicode->iso (no. use utf8!) - } else { - $translated = ":("; # failure - } - $translated = "babel.pl: result too long, probably an error" if (length($translated) > 700); - - &main::pSReply($translated); -} - -if (0) { - if (-t STDIN) { - #my $result = babel::babelfish('en','sp','hello world'); - #my $result = babel::babelfish('en','sp','The cheese is old and moldy, where is the bathroom?'); - my $result = babel::babelfish('en','gr','doesn\'t seem to translate things longer than 40 characters'); - $result =~ s/; /\n/g; - print "Babelfish says: \"$result\"\n"; - } -} - -1; diff --git a/src/Modules/babelfish.pl b/src/Modules/babelfish.pl new file mode 100644 index 0000000..6d3b092 --- /dev/null +++ b/src/Modules/babelfish.pl @@ -0,0 +1,133 @@ +# This program is copyright Jonathan Feinberg 1999. +# This program is distributed under the same terms as infobot. + +# Jonathan Feinberg +# jdf@pobox.com +# http://pobox.com/~jdf/ + +# Version 1.0 +# First public release. + +# hacked by Tim@Rikers.org to handle new URL and layout + +package babelfish; +use strict; + +my $no_babelfish; + +BEGIN { + eval "use URI::Escape"; # utility functions for encoding the + if ($@) { $no_babelfish++}; # babelfish request + eval "use LWP::UserAgent"; + if ($@) { $no_babelfish++}; +} + +BEGIN { + # Translate some feasible abbreviations into the ones babelfish + # expects. + use vars qw!%lang_code $lang_regex!; + %lang_code = ( + 'de' => 'de', + 'ge' => 'de', + 'gr' => 'el', + 'el' => 'el', + 'sp' => 'es', + 'es' => 'es', + 'en' => 'en', + 'fr' => 'fr', + 'it' => 'it', + 'ja' => 'ja', + 'jp' => 'ja', + 'ko' => 'ko', + 'kr' => 'ko', + 'nl' => 'nl', + 'po' => 'pt', + 'pt' => 'pt', + 'ru' => 'ru', + 'zh' => 'zh', + 'zt' => 'zt' + ); + + # Here's how we recognize the language you're asking for. It looks + # like RTSL saves you a few keystrokes in #perl, huh? + $lang_regex = join '|', keys %lang_code; +} + +sub babelfish { + return '' if $no_babelfish; + my ($from, $to, $phrase) = @_; + &main::DEBUG("babelfish($from, $to, $phrase)"); + + $from = $lang_code{$from}; + $to = $lang_code{$to}; + + my $ua = new LWP::UserAgent; + $ua->proxy('http', $::param{'httpProxy'}) if (&::IsParam("httpProxy")); + # Let's pretend + $ua->agent("Mozilla/5.0 " . $ua->agent); + $ua->timeout(5); + + my $req = + #HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn'); + HTTP::Request->new('POST', 'http://babelfish.altavista.com/babelfish/tr'); + +# babelfish ignored this, but it SHOULD work +# Accept-Charset: iso-8859-1 +# $req->header('Accept-Charset' => 'iso-8859-1'); +# print $req->header('Accept-Charset'); + $req->header('Accept-Language' => 'en'); + $req->content_type('application/x-www-form-urlencoded'); + + return translate($phrase, "${from}_${to}", $req, $ua); +} + +sub translate { + return '' if $no_babelfish; + my ($phrase, $languagepair, $req, $ua) = @_; + &main::DEBUG("translate($phrase, $languagepair, $req, $ua)"); + + my $urltext = uri_escape($phrase); + $req->content("urltext=$urltext&lp=$languagepair"); + &main::DEBUG("http://babelfish.altavista.com/babelfish/tr??urltext=$urltext&lp=$languagepair"); + + my $res = $ua->request($req); + my $translated; + + if ($res->is_success) { + my $html = $res->content; + # This method subject to change with the whims of Altavista's design + # staff. + ($translated) = $html; + + $translated =~ s/<[^>]*>//sg; + $translated =~ s/ / /sg; + $translated =~ s/\s+/ /sg; + #&main::DEBUG("$translated\n===remove \n"); + + $translated =~ s/\s*Translate again.*//i; + &main::DEBUG("$translated\n===remove after 'Translate again'\n"); + + $translated =~ s/[^:]*?:\s*(Help\s*)?//s; + &main::DEBUG("len=" . length($translated) . " $translated\n===remove to first ':', optional Help\n"); + + $translated =~ s/\n/ /g; + # FIXME: should we do unicode->iso (no. use utf8!) + } else { + $translated = ":("; # failure + } + $translated = "babelfish.pl: result too long, probably an error" if (length($translated) > 700); + + &main::pSReply($translated); +} + +if (0) { + if (-t STDIN) { + #my $result = babelfish::babelfish('en','sp','hello world'); + #my $result = babelfish::babelfish('en','sp','The cheese is old and moldy, where is the bathroom?'); + my $result = babelfish::babelfish('en','gr','doesn\'t seem to translate things longer than 40 characters'); + $result =~ s/; /\n/g; + print "Babelfish says: \"$result\"\n"; + } +} + +1; diff --git a/src/modules.pl b/src/modules.pl index be65dce..2070d67 100644 --- a/src/modules.pl +++ b/src/modules.pl @@ -22,7 +22,7 @@ if ($@) { ### MODULES. %myModules = ( - "babelfish" => "babel.pl", + "babelfish" => "babelfish.pl", "botmail" => "botmail.pl", "BZFlag" => "BZFlag.pl", "countdown" => "Countdown.pl",