ws

[infobot.git] / src / Modules / babel.pl
diff --git a/src/Modules/babel.pl b/src/Modules/babel.pl

index 4b1a957b597ddfa9323727c74a371040b7067e4e..56dc42797dc50da2fd18d0a80888fbb50abb73b8 100644 (file)
--- a/src/Modules/babel.pl
+++ b/src/Modules/babel.pl
@@ -27,23 +27,23 @@ BEGIN {
    # expects.
      use vars qw!%lang_code $lang_regex!;
      %lang_code = (
-                'fr' => 'fr',
-                'sp' => 'es',
-                'es' => 'es',
-                'po' => 'pt',
-                'pt' => 'pt',
-                'it' => 'it',
-                'ge' => 'de',
-                'de' => 'de',
-                'gr' => 'de',
-                'en' => 'en',
-                'zh' => 'zh',
-                'ja' => 'ja',
-                'jp' => 'ja',
-                'ko' => 'ko',
-                'kr' => 'ko',
-                'ru' => 'ru'
-               );
+               'fr' => 'fr',
+               'sp' => 'es',
+               'es' => 'es',
+               'po' => 'pt',
+               'pt' => 'pt',
+               'it' => 'it',
+               'ge' => 'de',
+               'de' => 'de',
+               'gr' => 'de',
+               'en' => 'en',
+               'zh' => 'zh',
+               'ja' => 'ja',
+               'jp' => 'ja',
+               'ko' => 'ko',
+               'kr' => 'ko',
+               'ru' => 'ru'
+              );
  
    # Here's how we recognize the language you're asking for.  It looks
    # like RTSL saves you a few keystrokes in #perl, huh?
@@ -53,16 +53,20 @@ BEGIN {
  sub babelfish {
      return '' if $no_babel;
    my ($from, $to, $phrase) = @_;
-  #&main::DEBUG("babelfish($from, $to, $phrase)");
+  &main::DEBUG("babelfish($from, $to, $phrase)");
  
    $from = $lang_code{$from};
    $to = $lang_code{$to};
  
    my $ua = new LWP::UserAgent;
+  $ua->proxy('http', $::param{'httpProxy'}) if (&::IsParam("httpProxy"));
+  # Let's pretend
+  $ua->agent("Mozilla/4.5 " . $ua->agent);
    $ua->timeout(5);
  
    my $req =
-    HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn');
+    #HTTP::Request->new('POST', 'http://babelfish.altavista.com/raging/translate.dyn');
+    HTTP::Request->new('POST', 'http://babelfish.altavista.com/babelfish/tr');
  
  # babelfish ignored this, but it SHOULD work
  # Accept-Charset: iso-8859-1
@@ -76,11 +80,11 @@ sub babelfish {
  sub translate {
      return '' if $no_babel;
    my ($phrase, $languagepair, $req, $ua) = @_;
-  #&main::DEBUG("translate($phrase, $languagepair, $req, $ua)");
+  &main::DEBUG("translate($phrase, $languagepair, $req, $ua)");
  
    my $urltext = uri_escape($phrase);
    $req->content("urltext=$urltext&lp=$languagepair");
-  #&main::DEBUG("http://babelfish.altavista.com/raging/translate.dyn??urltext=$urltext&lp=$languagepair");
+  &main::DEBUG("http://babelfish.altavista.com/babelfish/tr??urltext=$urltext&lp=$languagepair");
  
    my $res = $ua->request($req);
    my $translated;
@@ -89,40 +93,21 @@ sub translate {
        my $html = $res->content;
        # This method subject to change with the whims of Altavista's design
        # staff.
-      $html =~ s/\s+/ /sg;
-      #&main::DEBUG("$html\n===============\n");
-      # look for the first :< which should be the "To English:<", etc.
-      # strip any trailing tags, grab text that follows up to the next tag.
-      #my ($translated) = ($html =~ m{:\s*(<[^>]*>\s*)+([^<]*)}sx);
        ($translated) = $html;
-      #(undef, $translated) = ($html =~ m{(:\s+(<[^>]*>\s*)+)([^<\s]*)<}sx);
-
-      # Tim@Rikers.org get's frustrated and splits this into steps:
-      # 1) remove everything up to the first ':' in the text
-      $translated =~ s/.*?:\s*</</s;
-      # 2) remove any <attributes> till the first text
-      $translated =~ s/(<[^>]*>\s*)*//s;
-      # 3) remove the first trailing <attribute> and everything after it
-      $translated =~ s/<.*//s;
-
-      # look for contents of first textarea - not anymore cause > 40 char does not get one.
-      #my ($translated) = ($html =~ m{<textarea[^>]*>+([^<]*)}sx);
-      #&main::DEBUG("\"$translated\"\n===============\n");
-#       ($html =~ m{<textarea[^>]*>
-#               \s*
-#               ([^<]*)
-#               }sx);
-#         ($html =~ m{<br>
-#                         \s+
-#                             <font\ face="arial,\ helvetica">
-#                                 \s*
-#                                     (?:\*\*\s+time\ out\s+\*\*)?
-#                                         \s*
-#                                             ([^<]*)
-#                                             }sx);
+
+      $translated =~ s/<[^>]*>//sg;
+      $translated =~ s/&nbsp;/ /sg;
+      $translated =~ s/\s+/ /sg;
+      #&main::DEBUG("$translated\n===remove <attributes>\n");
+
+      $translated =~ s/\s*Translate again.*//i;
+      &main::DEBUG("$translated\n===remove after 'Translate again'\n");
+
+      $translated =~ s/[^:]*?:\s*(Help\s*)?//s;
+      &main::DEBUG("$translated\n===remove to first ':', optional Help\n");
+
        $translated =~ s/\n/ /g;
-      $translated =~ s/\s*$//;
-      # need a way to do unicode->iso
+      # FIXME should we do unicode->iso
    } else {
        $translated = ":("; # failure
    }
@@ -131,11 +116,11 @@ sub translate {
  
  if (0) {
      if (-t STDIN) {
-        #my $result = babel::babelfish('en','sp','hello world');
-        #my $result = babel::babelfish('en','sp','The cheese is old and moldy, where is the bathroom?');
-        my $result = babel::babelfish('en','gr','doesn\'t seem to translate things longer than 40 characters');
-        $result =~ s/; /\n/g;
-        print "Babelfish says: \"$result\"\n";
+       #my $result = babel::babelfish('en','sp','hello world');
+       #my $result = babel::babelfish('en','sp','The cheese is old and moldy, where is the bathroom?');
+       my $result = babel::babelfish('en','gr','doesn\'t seem to translate things longer than 40 characters');
+       $result =~ s/; /\n/g;
+       print "Babelfish says: \"$result\"\n";
      }
  }