From: dondelelcaro <dondelelcaro@c11ca15a-4712-0410-83d8-924469b57eb5>
Date: Sat, 10 Jun 2006 22:37:14 +0000 (+0000)
Subject: add DebianBugs and W3Scraper
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=ebc49b9ae0376834282d98301747f22af0d4d449;p=infobot.git

add DebianBugs and W3Scraper

git-svn-id: https://svn.code.sf.net/p/infobot/code/branches/don/dpkg@1289 c11ca15a-4712-0410-83d8-924469b57eb5
---

diff --git a/src/Modules/DebianBugs.pm b/src/Modules/DebianBugs.pm
new file mode 100644
index 0000000..661247c
--- /dev/null
+++ b/src/Modules/DebianBugs.pm
@@ -0,0 +1,123 @@
+# This module is a plugin for WWW::Scraper, and allows one to search
+# google, and is released under the terms of the GPL version 2, or any
+# later version. See the file README and COPYING for more
+# information. Copyright 2002 by Don Armstrong <don@donarmstrong.com>.
+
+# $Id:  $
+
+package DebianBugs;
+
+use warnings;
+use strict;
+
+use vars qw($VERSION $DEBUG);
+
+use LWP::UserAgent;
+
+$VERSION = q($Rev: $);
+$DEBUG ||= 0;
+
+sub get_url($){
+     my $url = shift;
+
+     my $ua = LWP::UserAgent->new;
+     $ua->agent("blootbug_debbugs/$VERSION");
+
+     # Create a request
+     my $req = HTTP::Request->new(GET => $url);
+     # Pass request to the user agent and get a response back
+     my $res = $ua->request($req);
+     # Check the outcome of the response
+     if ($res->is_success) {
+	  return $res->content;
+     } else {
+	  return undef;
+     }
+}
+
+sub bug_info($;$){
+     my $bug_num = shift;
+     my $options = shift || {};
+
+     if (not $bug_num =~ /^\#?\d+$/) {
+	  warn "Bug is not a number!" and return undef if not $options->{return_warnings};
+	  return "Bug is not a number!";
+     }
+     $bug_num =~ s/^\#//;
+     my $report = get_url("http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=$bug_num");
+
+     # strip down report to relevant header information.
+     $report =~ /<HEAD>(.+?)<HR>/s;
+     $report = $1;
+     my $bug = {};
+     ($bug->{num},$bug->{title}) = $report =~ m#\#(\d+)\<\/A\>\<BR\>(.+?)\<\/H1\>#is;
+     if ($DEBUG) {
+	  print "Bugnum: $bug->{num}\nTitle: $bug->{title}\nReport: $report\n";
+     }
+     $bug->{title} =~ s/&lt;/\</g;
+     $bug->{title} =~ s/&gt;/\>/g;
+     $bug->{title} =~ s/&quot;/\"/g;
+     $bug->{severity} = 'n'; #Default severity is normal
+     my @bug_flags = split /(?<!\&.t)[;\.]\n/s, $report;
+     foreach my $bug_flag (@bug_flags) {
+	  print "Bug_flag: $bug_flag\n" if $DEBUG;
+	  if ($bug_flag =~ /Severity:/i) {
+	       ($bug->{severity}) = $bug_flag =~ /(wishlist|minor|normal|important|serious|grave)/i;
+	       # Just leave the leter instead of the whole thing.
+	       $bug->{severity} =~ s/^(.).+$/$1/;
+	  }
+	  elsif ($bug_flag =~ /Package:/) {
+	       ($bug->{package}) = $bug_flag =~ /\"\>\s*([^\<\>\"]+?)\s*\<\/a\>/;
+	  }
+	  elsif ($bug_flag =~ /Reported by:/) {
+	       ($bug->{reporter}) = $bug_flag =~ /\"\>\s*(.+?)\s*\<\/a\>/;
+	       # strip &lt; and &gt;
+	       $bug->{reporter} =~ s/&lt;/\</g;
+	       $bug->{reporter} =~ s/&gt;/\>/g;
+	  }
+	  elsif ($bug_flag =~ /Date:/) {
+	       ($bug->{date}) = $bug_flag =~ /Date:\s*(\w.+?)\s*$/;
+	       #ditch extra whitespace
+	       $bug->{date} =~ s/\s{2,}/\ /;
+	  }
+	  elsif ($bug_flag =~ /Tags:/) {
+	       ($bug->{tags}) = $bug_flag =~ /strong\>\s*(.+?)\s*\<\/strong\>/;
+	  }
+	  elsif ($bug_flag =~ /merged with /) {
+	       $bug_flag =~ s/merged with\s*//;
+	       $bug_flag =~ s/\<[^\>]+\>//g;
+               $bug_flag =~ s/\s//sg;
+	       $bug->{merged_with} = $bug_flag;
+
+	  }
+          elsif ($bug_flag =~ /\>Done:\</) {
+               $bug->{done} = 1;
+          }
+	  elsif ($bug_flag =~ /\>Fixed\</) {
+	       $bug->{done} = 1;
+	  }
+     }
+     # report bug
+
+     $report = '';
+     $report .= 'DONE:' if defined $bug->{done} and $bug->{done};
+     $report .= '#'.$bug->{num}.':'.uc($bug->{severity}).'['.$bug->{package}.'] '.$bug->{title};
+     $report .= ' ('.$bug->{tags}.')' if defined $bug->{tags};
+     $report .= '; ' . $bug->{date};
+     # Avoid reporting so many merged bugs.
+     $report .= ' ['.join(',',splice(@{[split(/,/,$bug->{merged_with})]},0,3)).']' if defined $bug->{merged_with};
+     if ($DEBUG) {
+          use Data::Dumper;
+          print STDERR Dumper($bug);
+     }
+     return $report;
+}
+
+sub package_bugs($){
+
+}
+
+1;
+
+
+__END__
diff --git a/src/Modules/W3Scraper.pl b/src/Modules/W3Scraper.pl
new file mode 100644
index 0000000..3c59e4a
--- /dev/null
+++ b/src/Modules/W3Scraper.pl
@@ -0,0 +1,80 @@
+# WWW::Scraper backend, replaces W3Search.pl functionality.
+# Uses WWW::Scraper and associated modules instead of WWW::Search;
+
+# Written by Don Armstrong <don@donarmstrong.com>
+
+# $Id$
+
+package W3Scraper;
+
+=head1 NAME
+
+W3Scraper - blootbot plugin to do searches using WWW::Scraper
+
+=head1 SYNOPSIS
+
+
+
+=head1 DESCRIPTION
+
+
+
+=cut
+
+use warnings;
+use strict;
+use vars qw ($VERSION);
+
+my $maxshow = 5;
+
+sub W3Scraper {
+     my ($where,$what,$type) = @_;
+
+     # rip out the available engines by brute force.
+     my @matches = grep {/$where.pm/i and !/FieldTranslation/i and !/Re(sponse|quest)/i and !/TidyXML/i}
+	  split /\n/, qx(ls /usr/share/perl5/WWW/Scraper);
+
+     if (@matches) {
+	  $where = shift @matches;
+	  $where =~ s/\.pm//;
+     }
+     else {
+	  &::msg($::who, "i don't know how to check '$where'");
+	  return;
+     }
+
+     return unless &::loadPerlModule("WWW::Scraper");
+
+     my $scraper;
+     eval {
+	  $scraper = new WWW::Scraper($where,$what);
+     };
+     if (not defined $scraper) {
+	  &::msg($::who,"$where is an invalid search.");
+	  return;
+     }
+
+     my $count = 0;
+     my $results = q();
+     while (my $result = $scraper->next_response()) {
+	  next if not defined $result->url or not defined ${$result->url};
+	  next if ((length ${$result->url}) > 80); #ignore long urls
+	  if ($count > 0) {
+	       $results .= ' or ';
+	  }
+	  $results .= ${$result->url};
+	  last if ++$count > $maxshow;
+     }
+     if ($count > 0) {
+	  $results = qq($where says "$what" is at $results [).
+	       $scraper->approximate_result_count().
+		    q( results]);
+     }
+     else {
+	  $results = qq($where was unable to find "$what");
+     }
+     &::performStrictReply($results);
+}
+
+
+1;