#
# Freshmeat.pl: Frontend to www.freshmeat.net
# Author: dms
-# Version: v0.7c (20000606)
+# Version: v0.7d (20000923)
# Created: 19990930
#
### download compressed version instead?
my %urls = (
- 'public' => 'http://core.freshmeat.net/backend/appindex.txt',
+ 'public' => 'http://www.freshmeat.net/backend/appindex.txt',
'private' => 'http://feed.freshmeat.net/appindex/appindex.txt',
);
&downloadIndex();
&Freshmeat($sstr);
} );
+ # both parent/fork runs here, in case the following looks weird.
return if ($$ == $main::bot_pid);
}
if (!&showPackage($sstr)) { # no exact match.
- my $start_time = &main::gettimeofday();
+ my $start_time = &main::timeget();
my %hash;
- # search by key first.
+ # search by key/NAME first.
foreach (&main::searchTable("freshmeat", "name","name",$sstr)) {
$hash{$_} = 1 unless exists $hash{$_};
}
+ # search by description line.
foreach (&main::searchTable("freshmeat", "name","oneliner", $sstr)) {
$hash{$_} = 1 unless exists $hash{$_};
last if (scalar keys %hash > 15);
my @list = keys %hash;
# search by value, if we have enough room to do it.
if (scalar @list == 1) {
- &main::DEBUG("only one partial match found; showing full info.");
+ &main::status("only one match found; showing full info.");
&showPackage($list[0]);
return;
}
# show how long it took.
- my $delta_time = &main::gettimeofday() - $start_time;
+ my $delta_time = &main::timedelta($start_time);
&main::status(sprintf("freshmeat: %.02f sec to complete query.", $delta_time)) if ($delta_time > 0);
for (@list) {
}
sub downloadIndex {
- my $start_time = &main::gettimeofday(); # set the start time.
- my $idx = "$main::bot_base_dir/Temp/fm_index.txt";
+ my $start_time = &main::timeget(); # set the start time.
+ my $idx = "$main::param{tempDir}/fm_index.txt";
&main::msg($main::who, "Updating freshmeat index... please wait");
if (&main::isStale($idx, 1)) {
&main::status("Freshmeat: fetching data.");
foreach (keys %urls) {
- &main::DEBUG("FM: urls{$_} => '$urls{$_}'.");
my $retval = &main::getURLAsFile($urls{$_}, $idx);
- next if ($retval eq "403");
+ next if ($retval =~ /^(403|500)$/);
+
&main::DEBUG("FM: last! retval => '$retval'.");
last;
}
return;
}
- ### TODO: do not dump full contents to an array.
- ### => process on the fly instead but how?
- open(IN, $idx);
+ if ($idx =~ /bz2$/) {
+ open(IN, "bzcat $idx |");
+ } elsif ($idx =~ /gz$/) {
+ open(IN, "gzcat $idx |");
+ } else {
+ open(IN, $idx);
+ }
# delete the table before we redo it.
&main::deleteTable("freshmeat");
&main::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
my @data;
+ my @done;
while (my $line = <IN>) {
chop $line;
if ($line ne "%%") {
next;
}
- if ($i % 100 == 0 and $i != 0) {
+ if ($i % 200 == 0 and $i != 0) {
&main::DEBUG("FM: unlocking and locking.");
&main::dbRaw("UNLOCK", "UNLOCK TABLES");
- sleep 1; # another lame hack to "prevent" errors.
+ ### another lame hack to "prevent" errors.
+ select(undef, undef, undef, 0.2);
&main::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
}
+ if (grep /^\Q$data[0]\E$/, @done) {
+ &main::DEBUG("dupe? $data[0]");
+ @data = ();
+ next;
+ }
+
$i++;
pop @data;
$data[1] ||= "none";
$data[2] ||= "none";
&main::dbSetRow("freshmeat", @data);
+ push(@done,$data[0]);
@data = ();
}
close IN;
&main::DEBUG("FM: data ".scalar(@data) );
&main::dbRaw("UNLOCK", "UNLOCK TABLES");
- my $delta_time = &main::gettimeofday() - $start_time;
+ my $delta_time = &main::timedelta($start_time);
&main::status(sprintf("Freshmeat: %.02f sec to complete.", $delta_time)) if ($delta_time > 0);
my $count = &main::countKeys("freshmeat");
}
sub freshmeatAnnounce {
- my $file = "$main::bot_base_dir/Temp/fm_recent.txt";
+ my $file = "$main::param{tempDir}/fm_recent.txt";
my @old;
+ ### if file exists, lets read it.
if ( -f $file) {
open(IN, $file);
while (<IN>) {
push(@now, $what);
}
- ### ...
-
+ ### if file does not exist, write new.
if (! -f $file) {
open(OUT, ">$file");
foreach (@now) {
&main::notice($_, $line);
}
+ ### output new file.
open(OUT, ">$file");
foreach (@now) {
print OUT "$_\n";