# Version 1.0
# First public release.
+# hacked by Tim@Rikers.org to handle new URL and layout
+
package babel;
use strict;
+my $no_babel;
+
BEGIN {
- # Translate some feasible abbreviations into the ones babelfish
- # expects.
+ eval "use URI::Escape"; # utility functions for encoding the
+ if ($@) { $no_babel++}; # babelfish request
+ eval "use LWP::UserAgent";
+ if ($@) { $no_babel++};
+}
+
+BEGIN {
+ # Translate some feasible abbreviations into the ones babelfish
+ # expects.
use vars qw!%lang_code $lang_regex!;
%lang_code = (
'fr' => 'fr',
'sp' => 'es',
+ 'es' => 'es',
'po' => 'pt',
'pt' => 'pt',
'it' => 'it',
'ge' => 'de',
'de' => 'de',
'gr' => 'de',
- 'en' => 'en'
+ 'en' => 'en',
+ 'zh' => 'zh',
+ 'ja' => 'ja',
+ 'jp' => 'ja',
+ 'ko' => 'ko',
+ 'kr' => 'ko',
+ 'ru' => 'ru'
);
- # Here's how we recognize the language you're asking for. It looks
- # like RTSL saves you a few keystrokes in #perl, huh?
- $lang_regex = join '|', keys %lang_code;
+ # Here's how we recognize the language you're asking for. It looks
+ # like RTSL saves you a few keystrokes in #perl, huh?
+ $lang_regex = join '|', keys %lang_code;
}
sub babelfish {
- my ($direction, $lang, $phrase) = @_;
-
- return unless &::loadPerlModule("URI::Escape");
- return unless &::loadPerlModule("LWP::UserAgent");
-
- $lang = $lang_code{$lang};
-
- my $ua = new LWP::UserAgent;
- $ua->timeout(10);
- $ua->proxy('http', $::param{'httpProxy'}) if &::IsParam("httpProxy");
-
- my $url = 'http://babelfish.altavista.com/raging/translate.dyn';
- my $req = HTTP::Request->new('POST',$url);
-
- $req->content_type('application/x-www-form-urlencoded');
-
- my $tolang = "en_$lang";
- my $toenglish = "${lang}_en";
-
- if ($direction eq 'to') {
- my $xlate = translate($phrase, $tolang, $req, $ua);
- &::pSReply($xlate) if ($xlate);
- return;
- } elsif ($direction eq 'from') {
- my $xlate = translate($phrase, $toenglish, $req, $ua);
- &::pSReply($xlate) if ($xlate);
- return;
- }
- &::DEBUG("what's this junk?");
-
- my $last_english = $phrase;
- my $last_lang;
- my %results = ();
- my $i = 0;
- while ($i++ < 7) {
- last if $results{$phrase}++; # REMOVE!
- $last_lang = $phrase = translate($phrase, $tolang, $req, $ua);
- last if $results{$phrase}++; # REMOVE!
- $last_english = $phrase = translate($phrase, $toenglish, $req, $ua);
- }
-
- &::pSReply($last_english);
+ return '' if $no_babel;
+ my ($from, $to, $phrase) = @_;
+ &main::DEBUG("babelfish($from, $to, $phrase)");
+
+ $from = $lang_code{$from};
+ $to = $lang_code{$to};
+
+ my $ua = new LWP::UserAgent;
+ $ua->proxy('http', $::param{'httpProxy'}) if (&::IsParam("httpProxy"));
+ # Let's pretend
+ $ua->agent("Mozilla/4.5 " . $ua->agent);
+ $ua->timeout(5);
+
+ my $req =
+ #HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn');
+ HTTP::Request->new('POST', 'http://babelfish.altavista.com/babelfish/tr');
+
+# babelfish ignored this, but it SHOULD work
+# Accept-Charset: iso-8859-1
+# $req->header('Accept-Charset' => 'iso-8859-1');
+# print $req->header('Accept-Charset');
+ $req->content_type('application/x-www-form-urlencoded');
+
+ return translate($phrase, "${from}_${to}", $req, $ua);
}
sub translate {
- my ($phrase, $languagepair, $req, $ua) = @_;
-
- my $urltext = URI::Escape::uri_escape($phrase);
- $req->content("urltext=$urltext&lp=$languagepair&doit=done");
-
- my $res = $ua->request($req);
-
- my $translated;
- if ($res->is_success) { # success.
- my $html = $res->content;
- my $textarea = 0;
- foreach (split "\n", $html) {
- $textarea = 1 if (/<textarea/i);
- next unless ($textarea);
-
- &::DEBUG(" '$_'");
-
- $textarea = 0 if (/<\/textarea/i);
- }
-
- $html =~ s/\cM//g;
- $html =~ s/\n\s*\n/\n/g;
- $html =~ s/\n/ /g; # ...
-
- if ($html =~ /<textarea.*?>(.*?)<\/textarea/si) {
- $translated = $1;
- $translated =~ s/^[\n ]|[\n ]$//g;
- } else {
- &::WARN("failed regex for babelfish.");
- }
+ return '' if $no_babel;
+ my ($phrase, $languagepair, $req, $ua) = @_;
+ &main::DEBUG("translate($phrase, $languagepair, $req, $ua)");
+
+ my $urltext = uri_escape($phrase);
+ $req->content("urltext=$urltext&lp=$languagepair");
+ &main::DEBUG("http://babelfish.altavista.com/babelfish/tr??urltext=$urltext&lp=$languagepair");
+
+ my $res = $ua->request($req);
+ my $translated;
+
+ if ($res->is_success) {
+ my $html = $res->content;
+ # This method subject to change with the whims of Altavista's design
+ # staff.
+ ($translated) = $html;
+
+ $translated =~ s/<[^>]*>//sg;
+ $translated =~ s/ / /sg;
+ $translated =~ s/\s+/ /sg;
+ #&main::DEBUG("$translated\n===remove <attributes>\n");
+
+ $translated =~ s/\s*Translate again.*//i;
+ &main::DEBUG("$translated\n===remove after 'Translate again'\n");
+
+ $translated =~ s/[^:]*?:\s*(Help\s*)?//s;
+ &main::DEBUG("$translated\n===remove to first ':', optional Help\n");
+
+ $translated =~ s/\n/ /g;
+ # FIXME: should we do unicode->iso (no. use utf8!)
+ } else {
+ $translated = ":("; # failure
+ }
+ &main::pSReply($translated);
+}
- } else { # failure
- $translated = "FAILURE w/ babelfish";
+if (0) {
+ if (-t STDIN) {
+ #my $result = babel::babelfish('en','sp','hello world');
+ #my $result = babel::babelfish('en','sp','The cheese is old and moldy, where is the bathroom?');
+ my $result = babel::babelfish('en','gr','doesn\'t seem to translate things longer than 40 characters');
+ $result =~ s/; /\n/g;
+ print "Babelfish says: \"$result\"\n";
}
-
- $translated ||= "NULL reply from babelfish.";
-
- return $translated;
}
1;