From 5265c2ed8e78478a2ce7429786eded93806b555f Mon Sep 17 00:00:00 2001 From: timriker Date: Wed, 30 Oct 2002 10:18:35 +0000 Subject: [PATCH] fix babelfish and add new languages git-svn-id: https://svn.code.sf.net/p/infobot/code/trunk/blootbot@566 c11ca15a-4712-0410-83d8-924469b57eb5 --- files/sample/sample.config | 4 +- src/CommandStubs.pl | 6 +- src/Modules/babel.pl | 199 ++++++++++++++++++++----------------- 3 files changed, 115 insertions(+), 94 deletions(-) diff --git a/files/sample/sample.config b/files/sample/sample.config index 25a99da..751d19e 100644 --- a/files/sample/sample.config +++ b/files/sample/sample.config @@ -188,8 +188,8 @@ set backlog 24 # [str] anything which requires LWP + http proxy. ###set httpProxy http://HOSTNAME:PORT/ -# [0/1] babelfish translator. jdf++. FIXME [DOES NOT WORK] -set babelfish false +# [0/1] babelfish translator. jdf++. +set babelfish true # [0/1] offer free factoid cookies set cookie true diff --git a/src/CommandStubs.pl b/src/CommandStubs.pl index b94f4a4..3e76e16 100644 --- a/src/CommandStubs.pl +++ b/src/CommandStubs.pl @@ -5,7 +5,7 @@ #use strict; -$babel_lang_regex = "fr|sp|po|pt|it|ge|de|gr|en"; +$babel_lang_regex = "fr|sp|es|po|pt|it|ge|de|gr|en|zh|ja|jp|ko|kr|ru"; ### COMMAND HOOK IMPLEMENTATION. # addCmdHook("SECTION", 'TEXT_HOOK', @@ -235,9 +235,9 @@ sub Modules { ^\s* (?:babel(?:fish)?|x|xlate|translate) \s+ - (to|from) # direction of translation (through) + ($babel_lang_regex)\w* # from language? \s+ - ($babel_lang_regex)\w* # which language? + ($babel_lang_regex)\w* # to language? \s* (.+) # The phrase to be translated }xoi) { diff --git a/src/Modules/babel.pl b/src/Modules/babel.pl index 0eb50c4..4b1a957 100644 --- a/src/Modules/babel.pl +++ b/src/Modules/babel.pl @@ -8,114 +8,135 @@ # Version 1.0 # First public release. +# hacked by Tim@Rikers.org to handle new URL and layout + package babel; use strict; +my $no_babel; + +BEGIN { + eval "use URI::Escape"; # utility functions for encoding the + if ($@) { $no_babel++}; # babelfish request + eval "use LWP::UserAgent"; + if ($@) { $no_babel++}; +} + BEGIN { - # Translate some feasible abbreviations into the ones babelfish - # expects. + # Translate some feasible abbreviations into the ones babelfish + # expects. use vars qw!%lang_code $lang_regex!; %lang_code = ( - 'fr' => 'fr', - 'sp' => 'es', - 'po' => 'pt', - 'pt' => 'pt', - 'it' => 'it', - 'ge' => 'de', - 'de' => 'de', - 'gr' => 'de', - 'en' => 'en' - ); - - # Here's how we recognize the language you're asking for. It looks - # like RTSL saves you a few keystrokes in #perl, huh? - $lang_regex = join '|', keys %lang_code; + 'fr' => 'fr', + 'sp' => 'es', + 'es' => 'es', + 'po' => 'pt', + 'pt' => 'pt', + 'it' => 'it', + 'ge' => 'de', + 'de' => 'de', + 'gr' => 'de', + 'en' => 'en', + 'zh' => 'zh', + 'ja' => 'ja', + 'jp' => 'ja', + 'ko' => 'ko', + 'kr' => 'ko', + 'ru' => 'ru' + ); + + # Here's how we recognize the language you're asking for. It looks + # like RTSL saves you a few keystrokes in #perl, huh? + $lang_regex = join '|', keys %lang_code; } sub babelfish { - my ($direction, $lang, $phrase) = @_; + return '' if $no_babel; + my ($from, $to, $phrase) = @_; + #&main::DEBUG("babelfish($from, $to, $phrase)"); - return unless &::loadPerlModule("URI::Escape"); - return unless &::loadPerlModule("LWP::UserAgent"); + $from = $lang_code{$from}; + $to = $lang_code{$to}; - $lang = $lang_code{$lang}; + my $ua = new LWP::UserAgent; + $ua->timeout(5); - my $ua = new LWP::UserAgent; - $ua->timeout(10); - $ua->proxy('http', $::param{'httpProxy'}) if &::IsParam("httpProxy"); + my $req = + HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn'); - my $url = 'http://babelfish.altavista.com/raging/translate.dyn'; - my $req = HTTP::Request->new('POST',$url); - - $req->content_type('application/x-www-form-urlencoded'); - - my $tolang = "en_$lang"; - my $toenglish = "${lang}_en"; - - if ($direction eq 'to') { - my $xlate = translate($phrase, $tolang, $req, $ua); - &::pSReply($xlate) if ($xlate); - return; - } elsif ($direction eq 'from') { - my $xlate = translate($phrase, $toenglish, $req, $ua); - &::pSReply($xlate) if ($xlate); - return; - } - &::DEBUG("what's this junk?"); - - my $last_english = $phrase; - my $last_lang; - my %results = (); - my $i = 0; - while ($i++ < 7) { - last if $results{$phrase}++; # REMOVE! - $last_lang = $phrase = translate($phrase, $tolang, $req, $ua); - last if $results{$phrase}++; # REMOVE! - $last_english = $phrase = translate($phrase, $toenglish, $req, $ua); - } +# babelfish ignored this, but it SHOULD work +# Accept-Charset: iso-8859-1 +# $req->header('Accept-Charset' => 'iso-8859-1'); +# print $req->header('Accept-Charset'); + $req->content_type('application/x-www-form-urlencoded'); - &::pSReply($last_english); + return translate($phrase, "${from}_${to}", $req, $ua); } sub translate { - my ($phrase, $languagepair, $req, $ua) = @_; - - my $urltext = URI::Escape::uri_escape($phrase); - $req->content("urltext=$urltext&lp=$languagepair&doit=done"); - - my $res = $ua->request($req); - - my $translated; - if ($res->is_success) { # success. - my $html = $res->content; - my $textarea = 0; - foreach (split "\n", $html) { - $textarea = 1 if (/