1 # This program is copyright Jonathan Feinberg 1999.
2 # This program is distributed under the same terms as infobot.
6 # http://pobox.com/~jdf/
9 # First public release.
11 # hacked by Tim@Rikers.org to handle new URL and layout
19 eval "use URI::Escape"; # utility functions for encoding the
20 if ($@) { $no_babel++}; # babelfish request
21 eval "use LWP::UserAgent";
22 if ($@) { $no_babel++};
26 # Translate some feasible abbreviations into the ones babelfish
28 use vars qw!%lang_code $lang_regex!;
48 # Here's how we recognize the language you're asking for. It looks
49 # like RTSL saves you a few keystrokes in #perl, huh?
50 $lang_regex = join '|', keys %lang_code;
54 return '' if $no_babel;
55 my ($from, $to, $phrase) = @_;
56 #&main::DEBUG("babelfish($from, $to, $phrase)");
58 $from = $lang_code{$from};
59 $to = $lang_code{$to};
61 my $ua = new LWP::UserAgent;
65 HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn');
67 # babelfish ignored this, but it SHOULD work
68 # Accept-Charset: iso-8859-1
69 # $req->header('Accept-Charset' => 'iso-8859-1');
70 # print $req->header('Accept-Charset');
71 $req->content_type('application/x-www-form-urlencoded');
73 return translate($phrase, "${from}_${to}", $req, $ua);
77 return '' if $no_babel;
78 my ($phrase, $languagepair, $req, $ua) = @_;
79 #&main::DEBUG("translate($phrase, $languagepair, $req, $ua)");
81 my $urltext = uri_escape($phrase);
82 $req->content("urltext=$urltext&lp=$languagepair");
83 #&main::DEBUG("http://babelfish.altavista.com/raging/translate.dyn??urltext=$urltext&lp=$languagepair");
85 my $res = $ua->request($req);
88 if ($res->is_success) {
89 my $html = $res->content;
90 # This method subject to change with the whims of Altavista's design
93 #&main::DEBUG("$html\n===============\n");
94 # look for the first :< which should be the "To English:<", etc.
95 # strip any trailing tags, grab text that follows up to the next tag.
96 #my ($translated) = ($html =~ m{:\s*(<[^>]*>\s*)+([^<]*)}sx);
97 ($translated) = $html;
98 #(undef, $translated) = ($html =~ m{(:\s+(<[^>]*>\s*)+)([^<\s]*)<}sx);
100 # Tim@Rikers.org get's frustrated and splits this into steps:
101 # 1) remove everything up to the first ':' in the text
102 $translated =~ s/.*?:\s*</</s;
103 # 2) remove any <attributes> till the first text
104 $translated =~ s/(<[^>]*>\s*)*//s;
105 # 3) remove the first trailing <attribute> and everything after it
106 $translated =~ s/<.*//s;
108 # look for contents of first textarea - not anymore cause > 40 char does not get one.
109 #my ($translated) = ($html =~ m{<textarea[^>]*>+([^<]*)}sx);
110 #&main::DEBUG("\"$translated\"\n===============\n");
111 # ($html =~ m{<textarea[^>]*>
117 # <font\ face="arial,\ helvetica">
119 # (?:\*\*\s+time\ out\s+\*\*)?
123 $translated =~ s/\n/ /g;
124 $translated =~ s/\s*$//;
125 # need a way to do unicode->iso
127 $translated = ":("; # failure
129 &main::pSReply($translated);
134 #my $result = babel::babelfish('en','sp','hello world');
135 #my $result = babel::babelfish('en','sp','The cheese is old and moldy, where is the bathroom?');
136 my $result = babel::babelfish('en','gr','doesn\'t seem to translate things longer than 40 characters');
137 $result =~ s/; /\n/g;
138 print "Babelfish says: \"$result\"\n";