1 # This program is copyright Jonathan Feinberg 1999.
2 # This program is distributed under the same terms as infobot.
6 # http://pobox.com/~jdf/
9 # First public release.
11 # hacked by Tim@Rikers.org to handle new URL and layout
17 #my $url = 'http://babelfish.av.com/tr';
18 my $url = 'http://babelfish.yahoo.com/translate_txt';
21 eval "use URI::Escape"; # utility functions for encoding the
22 if ($@) { $no_babelfish++ }
24 eval "use LWP::UserAgent";
25 if ($@) { $no_babelfish++ }
30 # Translate some feasible abbreviations into the ones babelfish
32 use vars qw!%lang_code $lang_regex!;
55 # Here's how we recognize the language you're asking for. It looks
56 # like RTSL saves you a few keystrokes in #perl, huh?
57 $lang_regex = join '|', keys %lang_code;
61 return '' if $no_babelfish;
62 my ( $from, $to, $phrase ) = @_;
63 &::DEBUG("babelfish($from, $to, $phrase)");
65 $from = $lang_code{$from};
66 $to = $lang_code{$to};
68 my $ua = new LWP::UserAgent;
69 $ua->proxy( 'http', $::param{'httpProxy'} ) if ( &::IsParam('httpProxy') );
72 $ua->agent( "Mozilla/5.0 " . $ua->agent );
75 my $req = HTTP::Request->new( 'POST', $url );
77 $req->header('Accept-Language' => 'en-us');
78 $req->header('Accept-Charset' => 'UTF-8,*');
79 $req->content_type('application/x-www-form-urlencoded');
81 return translate( $phrase, "${from}_${to}", $req, $ua );
85 return '' if $no_babelfish;
86 my ( $phrase, $languagepair, $req, $ua ) = @_;
87 &::DEBUG("translate($phrase, $languagepair, $req, $ua)");
89 my $trtext = uri_escape($phrase);
90 $req->content("trtext=$trtext&lp=$languagepair");
91 &::DEBUG("$url??trtext=$trtext&lp=$languagepair");
93 my $res = $ua->request($req);
96 if ( $res->is_success ) {
97 my $html = $res->content;
99 # This method subject to change with the whims of Babelfish design staff.
100 ($translated) = $html;
102 $translated =~ s/.*<\/head>//sg;
103 # clean before doc-body
104 $translated =~ s/.*<div id="doc-body"[^>]*>//sg;
105 # clean after first form
106 $translated =~ s/<\/form>.*//sg;
107 # convert back to spaces
108 $translated =~ s/ / /sg;
109 &::DEBUG("================================\n$translated\n========================\n");
111 $translated =~ s/.*<div id="result">//sg;
113 $translated =~ s/<\/div.*//sg;
115 $translated =~ s/<[^>]*>/ /sg;
117 $translated =~ s/[\n\r\t]/ /g;
118 # strip leading whitespace
119 $translated =~ s/^\s+//sg;
120 # strip trailing whitespace
121 $translated =~ s/\s+$//sg;
122 # strip multiple whitespace
123 $translated =~ s/\s+/ /sg;
125 # FIXME: any entities to utf8?
128 $translated = ":("; # failure
130 $translated = "babelfish.pl: result too long, probably an error"
131 if ( length($translated) > 700 );
138 my $babel_lang_regex =
139 "de|ge|gr|el|sp|es|en|fr|it|ja|jp|ko|kr|nl|po|pt|ru|zh|zt";
142 ($babel_lang_regex)\w* # from language?
144 ($babel_lang_regex)\w* # to language?
146 (.+) # The phrase to be translated
150 &::performStrictReply( &babelfishParam( lc $1, lc $2, lc $3 ) );
157 # vim:ts=4:sw=4:expandtab:tw=80