#
# Freshmeat.pl: Frontend to www.freshmeat.net
# Author: dms
-# Version: v0.7c (20000606)
+# Version: v0.7d (20000923)
# Created: 19990930
#
package Freshmeat;
use strict;
-
-### download compressed version instead?
+use vars qw(@cols @data $string %pkg $i $locktime);
my %urls = (
- 'public' => 'http://core.freshmeat.net/backend/appindex.txt',
- 'private' => 'http://feed.freshmeat.net/appindex/appindex.txt',
+ 'public' => 'http://www.freshmeat.net/backend/fm-projects.rdf.bz2',
+# 'private' => 'http://feed.freshmeat.net/appindex/appindex.txt',
);
####
# Usage: &Freshmeat($string);
sub Freshmeat {
my $sstr = lc($_[0]);
- my $refresh = $main::param{'freshmeatRefreshInterval'} * 60 * 60;
+ my $refresh = &::getChanConfDefault("freshmeatRefreshInterval",
+ "", 24) * 60 * 60 * 7;
- my $last_refresh = &main::dbGet("freshmeat", "name","_","stable");
+ my $last_refresh = &::dbGet("freshmeat", "latest_version", "projectname_short='_'");
my $renewtable = 0;
- if (defined $last_refresh) {
+ if (defined $last_refresh and $last_refresh =~ /^\d+$/) {
$renewtable++ if (time() - $last_refresh > $refresh);
} else {
$renewtable++;
}
- $renewtable++ if (&main::countKeys("freshmeat") < 10);
+ $renewtable++ if (&::countKeys("freshmeat") < 1000);
- if ($renewtable and $$ == $main::blootbot_pid) {
- &main::Forker("freshmeat", sub {
+ if ($renewtable) {
+ if ($$ == $::bot_pid) {
+ &::Forker("freshmeat", sub {
&downloadIndex();
&Freshmeat($sstr);
- } );
- return if ($$ == $main::bot_pid);
+ } );
+ # both parent/fork runs here, in case the following looks weird.
+ } else {
+ &downloadIndex();
+ }
+
+ return if ($$ == $::bot_pid);
}
if (!&showPackage($sstr)) { # no exact match.
- my $start_time = &main::gettimeofday();
+ my $start_time = &::timeget();
my %hash;
- # search by key first.
- foreach (&main::searchTable("freshmeat", "name","name",$sstr)) {
+ # search by key/NAME first.
+ foreach (&::searchTable("freshmeat", "projectname_short", "projectname_short",$sstr)) {
$hash{$_} = 1 unless exists $hash{$_};
}
- foreach (&main::searchTable("freshmeat", "name","oneliner", $sstr)) {
+ # search by description line.
+ foreach (&::searchTable("freshmeat", "projectname_short", "desc_short", $sstr)) {
$hash{$_} = 1 unless exists $hash{$_};
last if (scalar keys %hash > 15);
}
my @list = keys %hash;
# search by value, if we have enough room to do it.
if (scalar @list == 1) {
- &main::DEBUG("only one partial match found; showing full info.");
+ &::status("only one match found; showing full info.");
&showPackage($list[0]);
return;
}
# show how long it took.
- my $delta_time = &main::gettimeofday() - $start_time;
- &main::status(sprintf("freshmeat: %.02f sec to complete query.", $delta_time)) if ($delta_time > 0);
+ my $delta_time = &::timedelta($start_time);
+ &::status(sprintf("freshmeat: %.02f sec to complete query.", $delta_time)) if ($delta_time > 0);
for (@list) {
tr/A-Z/a-z/;
s/([\,\;]+)/\037$1\037/g;
}
- &main::performStrictReply( &main::formListReply(1, "Freshmeat ", @list) );
+ &::performStrictReply( &::formListReply(1, "Freshmeat ", @list) );
}
}
sub showPackage {
my ($pkg) = @_;
- my @fm = &main::dbGet("freshmeat", "name",$pkg,"*");
+ my @fm = &::dbGet("freshmeat", "*", "projectname_short='$pkg'");
if (scalar @fm) { #1: perfect match of name.
my $retval;
- $retval = "$fm[0] \002(\002$fm[11]\002)\002, ";
- $retval .= "section $fm[3], ";
- $retval .= "is $fm[4]. ";
- $retval .= "Stable: \002$fm[1]\002, ";
- $retval .= "Development: \002$fm[2]\002. ";
- $retval .= $fm[5] || $fm[6]; # fallback to 'download'.
- $retval .= " deb: ".$fm[8] if ($fm[8] ne ""); # 'deb'.
- &main::performStrictReply($retval);
+ $retval = "$fm[0] \002(\002$fm[5]\002)\002, ";
+# $retval .= "section $fm[3], ";
+ $retval .= "is $fm[2]. ";
+ $retval .= "Version: \002$fm[1]\002, ";
+# $retval .= "Development: \002$fm[2]\002. ";
+ $retval .= $fm[4];
+### ???
+# $retval .= " deb: ".$fm[3] if ($fm[3] ne ""); # 'deb'.
+ &::performStrictReply($retval);
return 1;
} else {
return 0;
}
}
+sub randPackage {
+ my @fm = &::randKey("freshmeat","*");
+
+ if (scalar @fm) { #1: perfect match of name.
+ my $retval;
+ $retval = "$fm[0] \002(\002$fm[5]\002)\002, ";
+# $retval .= "section $fm[3], ";
+ $retval .= "is $fm[2]. ";
+ $retval .= "Version: \002$fm[1]\002, ";
+# $retval .= "Development: \002$fm[2]\002. ";
+ $retval .= $fm[4];
+### ???
+# $retval .= " deb: ".$fm[3] if ($fm[3] ne ""); # 'deb'.
+
+ return $retval;
+ } else {
+ return;
+ }
+}
+
sub downloadIndex {
- my $start_time = &main::gettimeofday(); # set the start time.
- my $idx = "$main::bot_base_dir/Temp/fm_index.txt";
+ my $start_time = &::timeget(); # set the start time.
+ my $idx = "$::param{tempDir}/fm-projects.rdf.bz2";
- &main::msg($main::who, "Updating freshmeat index... please wait");
+ if (!&::loadPerlModule("XML::Parser")) {
+ &::WARN("don't have xml::parser...");
+ return;
+ }
+ my $p = new XML::Parser(Style => 'Objects');
+ my %pkg;
+ my $string;
+
+ $p->setHandlers(
+ Char => \&xml_text,
+ End => \&xml_end,
+ );
+
+ &::msg($::who, "Updating freshmeat index... please wait");
+
+ if (&::isStale($idx, 1)) {
+ &::status("Freshmeat: fetching data.");
- if (&main::isStale($idx, 1)) {
- &main::status("Freshmeat: fetching data.");
foreach (keys %urls) {
- &main::DEBUG("FM: urls{$_} => '$urls{$_}'.");
- my $retval = &main::getURLAsFile($urls{$_}, $idx);
- next if ($retval eq "403");
- &main::DEBUG("FM: last! retval => '$retval'.");
+ $urls{$_} =~ /^.*\/(.*)$/;
+ $idx = "$::param{tempDir}/$1";
+ my $retval = &::getURLAsFile($urls{$_}, $idx);
+ next if ($retval =~ /^(403|500)$/);
+
+ &::DEBUG("FM: last! retval => '$retval'.");
last;
}
} else {
- &main::status("Freshmeat: local file hack.");
+ &::status("Freshmeat: local file hack.");
}
if (! -e $idx) {
- &main::msg($main::who, "the freshmeat butcher is closed.");
+ &::msg($::who, "the freshmeat butcher is closed.");
return;
}
if ( -s $idx < 100000) {
- &main::DEBUG("FM: index too small?");
+ &::DEBUG("FM: index too small?");
unlink $idx;
- &main::msg($main::who, "internal error?");
+ &::msg($::who, "internal error?");
return;
}
- ### TODO: do not dump full contents to an array.
- ### => process on the fly instead but how?
- open(IN, $idx);
+ if ($idx =~ /bz2$/) {
+ open(IN, "bzcat $idx |");
+ } elsif ($idx =~ /gz$/) {
+ open(IN, "gzcat $idx |");
+ } else {
+ open(IN, $idx);
+ }
# delete the table before we redo it.
- &main::deleteTable("freshmeat");
+ &::deleteTable("freshmeat");
### lets get on with business.
# set the last refresh time. fixes multiple spawn bug.
- &main::dbSet("freshmeat", "name","_","stable",time());
-
- my $i = 0;
- while (my $line = <IN>) {
- chop $line;
- $i++ if ($line eq "%%");
- last if ($i == 2);
- }
-
- &main::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
- my @data;
- while (my $line = <IN>) {
- chop $line;
- if ($line ne "%%") {
- push(@data,$line);
- next;
+ &::dbSet("freshmeat",
+ { "projectname_short" => "_" },
+ { "latest_version" => time() }
+ );
+
+ &::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
+ @cols = &::dbGetColInfo("freshmeat");
+
+ $locktime = time();
+
+ # this mess is to not dump IN to memory.
+ $_ = <IN>;
+ $_ = <IN>;
+ $_ = <IN>;
+
+ my $str;
+ while (<IN>) {
+ chop;
+
+ $str .= $_;
+
+ next unless (/<\/project>/);
+
+ # XML::Parser's parse() doesn't like the following.
+ # but parsefile() does... why!
+ for ($str) {
+ s/®/_/g;
+ s/ô//g;
+ s/"//g;
+ s/é/e/g;
+ s/à/a/g;
+ s/í/i/g;
+ s/­/_/g; # ???
+ s/´/a/g;
+ s/»/_/g; # ???
+ s/«/_/g; # ???
+ s/©/[C]/g;
+ s/°/deg/g;
+ s/Æ/A/g;
+ s/\cN//g; # fucking openbsd morons.
+ s/ /-/g;
+ s/ö/o/g;
+ s/¶//g; # ???
+ s/ã//g;
+ s/\cM/ /g; # stupid windows morons
+ s/²/square/g;
+ s/ü/?/g;
+ s/µ/u/g;
+ s/æ/a/g;
+ s/ø/o/g;
+ s/ð/e/g;
+ s/ß//g;
+ s/·//g;
}
- if ($i % 100 == 0 and $i != 0) {
- &main::DEBUG("FM: unlocking and locking.");
- &main::dbRaw("UNLOCK", "UNLOCK TABLES");
- sleep 1; # another lame hack to "prevent" errors.
- &main::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
+ if (0 and $str =~ s/\&(\S+?);//g) {
+ &::DEBUG("fm: sarred $1 to ''.");
}
- $i++;
- pop @data;
- $data[1] ||= "none";
- $data[2] ||= "none";
- &main::dbSetRow("freshmeat", @data);
- @data = ();
+ $p->parse($str, ProtocolEncoding => 'ISO-8859-1');
+ $str = "";
}
close IN;
- &main::DEBUG("FM: data ".scalar(@data) );
- &main::dbRaw("UNLOCK", "UNLOCK TABLES");
- my $delta_time = &main::gettimeofday() - $start_time;
- &main::status(sprintf("Freshmeat: %.02f sec to complete.", $delta_time)) if ($delta_time > 0);
+ &::dbRaw("UNLOCK", "UNLOCK TABLES");
- my $count = &main::countKeys("freshmeat");
- &main::status("Freshmeat: $count entries loaded.");
+ my $delta_time = &::timedelta($start_time);
+ &::status(sprintf("Freshmeat: %.02f sec to complete.", $delta_time)) if ($delta_time > 0);
+
+ my $count = &::countKeys("freshmeat");
+ &::status("Freshmeat: $count entries loaded.");
}
sub freshmeatAnnounce {
- my $file = "$main::bot_base_dir/Temp/fm_recent.txt";
+ my $file = "$::param{tempDir}/fm_recent.txt";
my @old;
+ ### if file exists, lets read it.
if ( -f $file) {
open(IN, $file);
while (<IN>) {
close IN;
}
- my @array = &main::getURL("http://core.freshmeat.net/backend/recentnews.txt");
+ my @array = &::getURL("http://core.freshmeat.net/backend/recentnews.txt");
my @now;
while (@array) {
push(@now, $what);
}
- ### ...
-
+ ### if file does not exist, write new.
if (! -f $file) {
open(OUT, ">$file");
foreach (@now) {
}
if (!scalar @new) {
- &main::DEBUG("fA: no new items.");
+ &::DEBUG("fA: no new items.");
return;
}
- my $chan;
- my @chans = split(/[\s\t]+/, lc $main::param{'freshmeatAnnounce'});
- @chans = keys(%main::channels) unless (scalar @chans);
-
- my $line = "Freshmeat update: ".join(" \002::\002 ", @new);
- foreach (@chans) {
- next unless (&main::validChan($_));
-
- &main::status("sending freshmeat update to $_.");
- &main::notice($_, $line);
- }
-
+ ### output new file.
open(OUT, ">$file");
foreach (@now) {
print OUT "$_\n";
}
close OUT;
+
+ return "Freshmeat update: ".join(" \002::\002 ", @new);
+}
+
+
+sub xml_text {
+ my ($e,$t) = @_;
+ return if ($t =~ /^\s*$/);
+
+ $string = $t;
+}
+
+sub xml_end {
+ my($expat,$text) = @_;
+
+ $pkg{$text} = $string;
+
+ if ($expat->depth == 0) {
+ for (my $j=0; $j<scalar @cols; $j++) {
+ $data[$j] = $pkg{ $cols[$j] };
+ }
+ $i++;
+
+ &::dbSetRow("freshmeat", @data);
+ undef @data;
+ undef %pkg;
+
+ if ($i % 200 == 0 and $i != 0) {
+ &::showProc();
+ &::status("FM: unlocking and locking ($i): ".
+ &::Time2String( time() - $locktime ) );
+ $locktime = time();
+
+ # I think the following leaks 120k of memory each time it's
+ # called... the wonders of libmysql-perl leaking!
+
+ &::dbRaw("UNLOCK", "UNLOCK TABLES");
+ ### another lame hack to "prevent" errors.
+ select(undef, undef, undef, 0.2);
+ &::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
+ }
+ }
}
1;