From: dondelelcaro Date: Sat, 10 Jun 2006 22:37:14 +0000 (+0000) Subject: add DebianBugs and W3Scraper X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=ebc49b9ae0376834282d98301747f22af0d4d449;p=infobot.git add DebianBugs and W3Scraper git-svn-id: https://svn.code.sf.net/p/infobot/code/branches/don/dpkg@1289 c11ca15a-4712-0410-83d8-924469b57eb5 --- diff --git a/src/Modules/DebianBugs.pm b/src/Modules/DebianBugs.pm new file mode 100644 index 0000000..661247c --- /dev/null +++ b/src/Modules/DebianBugs.pm @@ -0,0 +1,123 @@ +# This module is a plugin for WWW::Scraper, and allows one to search +# google, and is released under the terms of the GPL version 2, or any +# later version. See the file README and COPYING for more +# information. Copyright 2002 by Don Armstrong . + +# $Id: $ + +package DebianBugs; + +use warnings; +use strict; + +use vars qw($VERSION $DEBUG); + +use LWP::UserAgent; + +$VERSION = q($Rev: $); +$DEBUG ||= 0; + +sub get_url($){ + my $url = shift; + + my $ua = LWP::UserAgent->new; + $ua->agent("blootbug_debbugs/$VERSION"); + + # Create a request + my $req = HTTP::Request->new(GET => $url); + # Pass request to the user agent and get a response back + my $res = $ua->request($req); + # Check the outcome of the response + if ($res->is_success) { + return $res->content; + } else { + return undef; + } +} + +sub bug_info($;$){ + my $bug_num = shift; + my $options = shift || {}; + + if (not $bug_num =~ /^\#?\d+$/) { + warn "Bug is not a number!" and return undef if not $options->{return_warnings}; + return "Bug is not a number!"; + } + $bug_num =~ s/^\#//; + my $report = get_url("http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=$bug_num"); + + # strip down report to relevant header information. + $report =~ /(.+?)
/s; + $report = $1; + my $bug = {}; + ($bug->{num},$bug->{title}) = $report =~ m#\#(\d+)\<\/A\>\(.+?)\<\/H1\>#is; + if ($DEBUG) { + print "Bugnum: $bug->{num}\nTitle: $bug->{title}\nReport: $report\n"; + } + $bug->{title} =~ s/</\{title} =~ s/>/\>/g; + $bug->{title} =~ s/"/\"/g; + $bug->{severity} = 'n'; #Default severity is normal + my @bug_flags = split /(?{severity}) = $bug_flag =~ /(wishlist|minor|normal|important|serious|grave)/i; + # Just leave the leter instead of the whole thing. + $bug->{severity} =~ s/^(.).+$/$1/; + } + elsif ($bug_flag =~ /Package:/) { + ($bug->{package}) = $bug_flag =~ /\"\>\s*([^\<\>\"]+?)\s*\<\/a\>/; + } + elsif ($bug_flag =~ /Reported by:/) { + ($bug->{reporter}) = $bug_flag =~ /\"\>\s*(.+?)\s*\<\/a\>/; + # strip < and > + $bug->{reporter} =~ s/</\{reporter} =~ s/>/\>/g; + } + elsif ($bug_flag =~ /Date:/) { + ($bug->{date}) = $bug_flag =~ /Date:\s*(\w.+?)\s*$/; + #ditch extra whitespace + $bug->{date} =~ s/\s{2,}/\ /; + } + elsif ($bug_flag =~ /Tags:/) { + ($bug->{tags}) = $bug_flag =~ /strong\>\s*(.+?)\s*\<\/strong\>/; + } + elsif ($bug_flag =~ /merged with /) { + $bug_flag =~ s/merged with\s*//; + $bug_flag =~ s/\<[^\>]+\>//g; + $bug_flag =~ s/\s//sg; + $bug->{merged_with} = $bug_flag; + + } + elsif ($bug_flag =~ /\>Done:\{done} = 1; + } + elsif ($bug_flag =~ /\>Fixed\{done} = 1; + } + } + # report bug + + $report = ''; + $report .= 'DONE:' if defined $bug->{done} and $bug->{done}; + $report .= '#'.$bug->{num}.':'.uc($bug->{severity}).'['.$bug->{package}.'] '.$bug->{title}; + $report .= ' ('.$bug->{tags}.')' if defined $bug->{tags}; + $report .= '; ' . $bug->{date}; + # Avoid reporting so many merged bugs. + $report .= ' ['.join(',',splice(@{[split(/,/,$bug->{merged_with})]},0,3)).']' if defined $bug->{merged_with}; + if ($DEBUG) { + use Data::Dumper; + print STDERR Dumper($bug); + } + return $report; +} + +sub package_bugs($){ + +} + +1; + + +__END__ diff --git a/src/Modules/W3Scraper.pl b/src/Modules/W3Scraper.pl new file mode 100644 index 0000000..3c59e4a --- /dev/null +++ b/src/Modules/W3Scraper.pl @@ -0,0 +1,80 @@ +# WWW::Scraper backend, replaces W3Search.pl functionality. +# Uses WWW::Scraper and associated modules instead of WWW::Search; + +# Written by Don Armstrong + +# $Id$ + +package W3Scraper; + +=head1 NAME + +W3Scraper - blootbot plugin to do searches using WWW::Scraper + +=head1 SYNOPSIS + + + +=head1 DESCRIPTION + + + +=cut + +use warnings; +use strict; +use vars qw ($VERSION); + +my $maxshow = 5; + +sub W3Scraper { + my ($where,$what,$type) = @_; + + # rip out the available engines by brute force. + my @matches = grep {/$where.pm/i and !/FieldTranslation/i and !/Re(sponse|quest)/i and !/TidyXML/i} + split /\n/, qx(ls /usr/share/perl5/WWW/Scraper); + + if (@matches) { + $where = shift @matches; + $where =~ s/\.pm//; + } + else { + &::msg($::who, "i don't know how to check '$where'"); + return; + } + + return unless &::loadPerlModule("WWW::Scraper"); + + my $scraper; + eval { + $scraper = new WWW::Scraper($where,$what); + }; + if (not defined $scraper) { + &::msg($::who,"$where is an invalid search."); + return; + } + + my $count = 0; + my $results = q(); + while (my $result = $scraper->next_response()) { + next if not defined $result->url or not defined ${$result->url}; + next if ((length ${$result->url}) > 80); #ignore long urls + if ($count > 0) { + $results .= ' or '; + } + $results .= ${$result->url}; + last if ++$count > $maxshow; + } + if ($count > 0) { + $results = qq($where says "$what" is at $results [). + $scraper->approximate_result_count(). + q( results]); + } + else { + $results = qq($where was unable to find "$what"); + } + &::performStrictReply($results); +} + + +1;