]> git.donarmstrong.com Git - infobot.git/blobdiff - src/Modules/Freshmeat.pl
- bot stats: "blah has blah... is ranked xx/yy (zz percentile)"
[infobot.git] / src / Modules / Freshmeat.pl
index 41c013b679d61925354066eb94e2ce16260b80fe..269eb3e019d29e1a05240aaaf12b6a1ea12c1acf 100644 (file)
@@ -22,7 +22,7 @@ sub Freshmeat {
     my $refresh        = &::getChanConfDefault("freshmeatRefreshInterval",
                        "", 24) * 60 * 60 * 7;
 
-    my $last_refresh = &::dbGet("freshmeat", "projectname_short", "_", "latest_version");
+    my $last_refresh = &::dbGet("freshmeat", "latest_version", "projectname_short='_'");
     my $renewtable   = 0;
 
     if (defined $last_refresh and $last_refresh =~ /^\d+$/) {
@@ -84,7 +84,8 @@ sub Freshmeat {
 
 sub showPackage {
     my ($pkg)  = @_;
-    my @fm     = &::dbGet("freshmeat", "projectname_short", $pkg, "*");
+    my @fm     = &::dbGet("freshmeat", "*",
+                       "projectname_short=".&::dbQuote($pkg) );
 
     if (scalar @fm) {          #1: perfect match of name.
        my $retval;
@@ -108,13 +109,14 @@ sub randPackage {
 
     if (scalar @fm) {          #1: perfect match of name.
        my $retval;
-       $retval  = "$fm[0] \002(\002$fm[11]\002)\002, ";
-       $retval .= "section $fm[3], ";
-       $retval .= "is $fm[4]. ";
-       $retval .= "Stable: \002$fm[1]\002, ";
-       $retval .= "Development: \002$fm[2]\002. ";
-       $retval .= $fm[5] || $fm[6];             # fallback to 'download'.
-       $retval .= " deb: ".$fm[8] if ($fm[8] ne ""); # 'deb'.
+       $retval  = "$fm[0] \002(\002$fm[5]\002)\002, ";
+#      $retval .= "section $fm[3], ";
+       $retval .= "is $fm[2]. ";
+       $retval .= "Version: \002$fm[1]\002, ";
+#      $retval .= "Development: \002$fm[2]\002. ";
+       $retval .= $fm[4];
+### ???
+#      $retval .= " deb: ".$fm[3] if ($fm[3] ne ""); # 'deb'.
 
        return $retval;
     } else {
@@ -182,18 +184,72 @@ sub downloadIndex {
 
     ### lets get on with business.
     # set the last refresh time. fixes multiple spawn bug.
-    &::dbSet("freshmeat", "projectname_short", "_", "latest_version", time());
+    &::dbSet("freshmeat", 
+       { "projectname_short"   => "_" },
+       { "latest_version"      => time()
+         "desc_short"          => "" }
+    );
 
-    &::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
+#    &::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
     @cols      = &::dbGetColInfo("freshmeat");
 
     $locktime  = time();
-    # todo: prevent severe memory usage whilst opening this file!!!
-    $p->parse(*IN, ProtocolEncoding => 'ISO-8859-1');
+
+    # this mess is to not dump IN to memory.
+    $_ = <IN>;
+    $_ = <IN>;
+    $_ = <IN>;
+
+    my $str;
+    while (<IN>) {
+       chop;
+
+       $str .= $_;
+
+       next unless (/<\/project>/);
+
+       # XML::Parser's parse() doesn't like the following.
+       # but parsefile() does... why!
+       for ($str) {
+               s/&reg;/_/g;
+               s/&ocirc;//g;
+               s/&quot;//g;
+               s/&eacute;/e/g;
+               s/&agrave;/a/g;
+               s/&iacute;/i/g;
+               s/&shy;/_/g;    # ???
+               s/&acute;/a/g;
+               s/&raquo;/_/g;  # ???
+               s/&laquo;/_/g;  # ???
+               s/&copy;/[C]/g;
+               s/&deg;/deg/g;
+               s/&AElig;/A/g;
+               s/\cN//g;               # fucking openbsd morons.
+               s/&nbsp;/-/g;
+               s/&ouml;/o/g;
+               s/&para;//g;    # ???
+               s/&atilde;//g;
+               s/\cM/ /g;              # stupid windows morons
+               s/&sup2;/square/g;
+               s/&uuml;/?/g;
+               s/&micro;/u/g;
+               s/&aelig;/a/g;
+               s/&oslash;/o/g;
+               s/&eth;/e/g;
+               s/&szlig;//g;
+               s/&middot;//g;
+       }
+
+       if (0 and $str =~ s/\&(\S+?);//g) {
+           &::DEBUG("fm: sarred $1 to ''.");
+       }
+
+       $p->parse($str, ProtocolEncoding => 'ISO-8859-1');
+       $str = "";
+    }
     close IN;
 
-    &::DEBUG("FM: data ".scalar(@data) );
-    &::dbRaw("UNLOCK", "UNLOCK TABLES");
+#    &::dbRaw("UNLOCK", "UNLOCK TABLES");
 
     my $delta_time = &::timedelta($start_time);
     &::status(sprintf("Freshmeat: %.02f sec to complete.", $delta_time)) if ($delta_time > 0);
@@ -256,11 +312,12 @@ sub freshmeatAnnounce {
     return "Freshmeat update: ".join(" \002::\002 ", @new);
 }
 
+
 sub xml_text {
-    my($expat,$text) = @_;
-    return if ($text =~ /^\s+$/);
+    my ($e,$t) = @_;
+    return if ($t =~ /^\s*$/);
 
-    $string = $text;
+    $string = $t;
 }
 
 sub xml_end {
@@ -268,13 +325,13 @@ sub xml_end {
 
     $pkg{$text} = $string;
 
-    if ($expat->depth == 1) {
+    if ($expat->depth == 0) {
        for (my $j=0; $j<scalar @cols; $j++) {
            $data[$j] = $pkg{ $cols[$j] };
        }
        $i++;
 
-       &::dbSetRow("freshmeat", @data);
+       &::dbSetRow("freshmeat", [@data], "DELAY");
        undef @data;
        undef %pkg;
 
@@ -284,10 +341,13 @@ sub xml_end {
                &::Time2String( time() - $locktime ) );
            $locktime = time();
 
-           &::dbRaw("UNLOCK", "UNLOCK TABLES");
+           # I think the following leaks 120k of memory each time it's
+           # called... the wonders of libmysql-perl leaking!
+
+#          &::dbRaw("UNLOCK", "UNLOCK TABLES");
            ### another lame hack to "prevent" errors.
-           select(undef, undef, undef, 0.2);
-           &::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
+#          select(undef, undef, undef, 0.2);
+#          &::dbRaw("LOCK", "LOCK TABLES freshmeat WRITE");
        }
     }
 }