From: timriker Date: Fri, 16 May 2008 19:25:37 +0000 (+0000) Subject: use result div in babelfish X-Git-Url: https://git.donarmstrong.com/?p=infobot.git;a=commitdiff_plain;h=45e51047c2df703229843193e49fb9829489170f use result div in babelfish git-svn-id: https://svn.code.sf.net/p/infobot/code/trunk@1818 c11ca15a-4712-0410-83d8-924469b57eb5 --- diff --git a/src/Modules/babelfish.pl b/src/Modules/babelfish.pl index 8898bda..ed3ea1b 100644 --- a/src/Modules/babelfish.pl +++ b/src/Modules/babelfish.pl @@ -14,7 +14,8 @@ package babelfish; use strict; my $no_babelfish; -my $url = 'http://babelfish.av.com/tr'; +#my $url = 'http://babelfish.av.com/tr'; +my $url = 'http://babelfish.yahoo.com/translate_txt'; BEGIN { eval "use URI::Escape"; # utility functions for encoding the @@ -98,27 +99,27 @@ sub translate { if ( $res->is_success ) { my $html = $res->content; - # This method subject to change with the whims of Altavista's design - # staff. + # This method subject to change with the whims of Babelfish design staff. ($translated) = $html; - - $translated =~ s/<[^>]*>//sg; + # strip page head + $translated =~ s/.*<\/head>//sg; + &::DEBUG("================================\n$translated\n========================\n"); + # convert back to spaces $translated =~ s/ / /sg; + # strip multiple whitespace $translated =~ s/\s+/ /sg; - - #&::DEBUG("$translated\n===remove \n"); - - $translated =~ s/\s*Translate again.*//i; - &::DEBUG("$translated\n===remove after 'Translate again'\n"); - - $translated =~ s/[^:]*?:\s*(Help\s*)?//s; - &::DEBUG( "len=" - . length($translated) - . " $translated\n===remove to first ':', optional Help\n" ); - + # strip up to result + $translated =~ s/.*
//sg; + # strip rest of page + $translated =~ s/<\/div.*//sg; + # strip all markup + $translated =~ s/<[^>]*>//sg; + # \n to space $translated =~ s/\n/ /g; + # strip multiple whitespace + $translated =~ s/\s+/ /sg; - # FIXME: should we do unicode->iso (no. use utf8!) + # FIXME: any entities to utf8? } else { $translated = ":("; # failure @@ -148,20 +149,6 @@ sub babelfish { return; } -if (0) { - if ( -t STDIN ) { - -#my $result = babelfish::babelfish('en sp hello world'); -#my $result = babelfish::babelfish('en sp The cheese is old and moldy, where is the bathroom?'); - my $result = - babelfish::babelfish( - 'en gr doesn\'t seem to translate things longer than 40 characters' - ); - $result =~ s/; /\n/g; - print "Babelfish says: \"$result\"\n"; - } -} - 1; # vim:ts=4:sw=4:expandtab:tw=80