]> git.donarmstrong.com Git - infobot.git/commitdiff
use result div in babelfish
authortimriker <timriker@c11ca15a-4712-0410-83d8-924469b57eb5>
Fri, 16 May 2008 19:25:37 +0000 (19:25 +0000)
committertimriker <timriker@c11ca15a-4712-0410-83d8-924469b57eb5>
Fri, 16 May 2008 19:25:37 +0000 (19:25 +0000)
git-svn-id: https://svn.code.sf.net/p/infobot/code/trunk@1818 c11ca15a-4712-0410-83d8-924469b57eb5

src/Modules/babelfish.pl

index 8898bdaebfc03ccac4881933902faf1ab4fad6df..ed3ea1bda25b20cacbdfd22b58ca3eed777ef39c 100644 (file)
@@ -14,7 +14,8 @@ package babelfish;
 use strict;
 
 my $no_babelfish;
-my $url = 'http://babelfish.av.com/tr';
+#my $url = 'http://babelfish.av.com/tr';
+my $url = 'http://babelfish.yahoo.com/translate_txt';
 
 BEGIN {
     eval "use URI::Escape";    # utility functions for encoding the
@@ -98,27 +99,27 @@ sub translate {
     if ( $res->is_success ) {
         my $html = $res->content;
 
-        # This method subject to change with the whims of Altavista's design
-        # staff.
+        # This method subject to change with the whims of Babelfish design staff.
         ($translated) = $html;
-
-        $translated =~ s/<[^>]*>//sg;
+        # strip page head
+        $translated =~ s/.*<\/head>//sg;
+        &::DEBUG("================================\n$translated\n========================\n");
+        # convert back to spaces
         $translated =~ s/&nbsp;/ /sg;
+        # strip multiple whitespace
         $translated =~ s/\s+/ /sg;
-
-        #&::DEBUG("$translated\n===remove <attributes>\n");
-
-        $translated =~ s/\s*Translate again.*//i;
-        &::DEBUG("$translated\n===remove after 'Translate again'\n");
-
-        $translated =~ s/[^:]*?:\s*(Help\s*)?//s;
-        &::DEBUG( "len="
-              . length($translated)
-              . " $translated\n===remove to first ':', optional Help\n" );
-
+        # strip up to result
+        $translated =~ s/.*<div id="result">//sg;
+        # strip rest of page
+        $translated =~ s/<\/div.*//sg;
+        # strip all markup
+        $translated =~ s/<[^>]*>//sg;
+        # \n to space
         $translated =~ s/\n/ /g;
+        # strip multiple whitespace
+        $translated =~ s/\s+/ /sg;
 
-        # FIXME: should we do unicode->iso (no. use utf8!)
+        # FIXME: any entities to utf8?
     }
     else {
         $translated = ":(";    # failure
@@ -148,20 +149,6 @@ sub babelfish {
     return;
 }
 
-if (0) {
-    if ( -t STDIN ) {
-
-#my $result = babelfish::babelfish('en sp hello world');
-#my $result = babelfish::babelfish('en sp The cheese is old and moldy, where is the bathroom?');
-        my $result =
-          babelfish::babelfish(
-            'en gr doesn\'t seem to translate things longer than 40 characters'
-          );
-        $result =~ s/; /\n/g;
-        print "Babelfish says: \"$result\"\n";
-    }
-}
-
 1;
 
 # vim:ts=4:sw=4:expandtab:tw=80