X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2FModules%2FFactoids.pl;h=438471f5ca3ed4467704b9684d3f46baad727ef1;hb=f7cae48a17d6decd0a9bd997188271daa0a885b1;hp=1e231efa336ca941390870b73594f80168b9dfc1;hpb=b8fa03ddc7413bbf76158cd781ff35d16cd2a44f;p=infobot.git diff --git a/src/Modules/Factoids.pl b/src/Modules/Factoids.pl index 1e231ef..438471f 100644 --- a/src/Modules/Factoids.pl +++ b/src/Modules/Factoids.pl @@ -5,32 +5,43 @@ # Splitted: SQLExtras.pl # -if (&IsParam("useStrict")) { use strict; } +use strict; + +use vars qw($dbh $who); +use vars qw(%param); ### # Usage: &CmdFactInfo($faqtoid, $query); sub CmdFactInfo { my ($faqtoid, $query) = (lc $_[0], $_[1]); + $faqtoid =~ s/^cmd:/CMD:/; my @array; my $string = ""; if ($faqtoid eq "") { &help("factinfo"); - return $noreply; + return; } - my $i = 0; - my %factinfo; - my @factinfo = &getFactInfo($faqtoid,"*"); - foreach ( &dbGetRowInfo("factoids") ) { - $factinfo{$_} = $factinfo[$i] || ''; - $i++; - } + my %factinfo = &sqlSelectRowHash("factoids", "*", + { factoid_key => $faqtoid } + ); # factoid does not exist. - if (scalar @factinfo <= 1) { + if (scalar (keys %factinfo) <= 1) { &performReply("there's no such factoid as \002$faqtoid\002"); - return $noreply; + return; + } + + # fix for problem observed by asuffield. + # why did it happen though? + if (!$factinfo{'factoid_value'}) { + &performReply("there's no such factoid as \002$faqtoid\002; deleted because we don't have factoid_value!"); + foreach (keys %factinfo) { + &DEBUG("factinfo{$_} => '$factinfo{$_}'."); + } +### &delFactoid($faqtoid); + return; } # created: @@ -43,7 +54,9 @@ sub CmdFactInfo { my $time = $factinfo{'created_time'}; if ($time) { if (time() - $time > 60*60*24*7) { - $string .= " at \037". scalar(localtime $time). "\037"; + my $days = int( (time() - $time)/60/60/24 ); + $string .= " at \037". scalar(gmtime $time). "\037" . + " ($days days)"; } else { $string .= " ".&Time2String(time() - $time)." ago"; } @@ -52,29 +65,23 @@ sub CmdFactInfo { push(@array,$string); } - # modified: -# if ($factinfo{'modified_by'}) { -# $string = "last modified"; -# -# my $time = $factinfo{'modified_time'}; -# if ($time) { -# if (time() - $time > 60*60*24*7) { -# $string .= " at \037". scalar(localtime $time). "\037"; -# } else { -# $string .= " ".&Time2String(time() - $time)." ago"; -# } -# } -# -# my @x; -# foreach (split ",", $factinfo{'modified_by'}) { -# /\!/; -# push(@x, $`); -# } -# $string .= "by ".&IJoin(@x); -# -# $i++; -# push(@array,$string); -# } + # modified: (TimRiker asks "why do you keep turning this off?) + if ($factinfo{'modified_by'}) { + $string = "last modified"; + + my $time = $factinfo{'modified_time'}; + if ($time) { + if (time() - $time > 60*60*24*7) { + $string .= " at \037". scalar(gmtime $time). "\037"; + } else { + $string .= " ".&Time2String(time() - $time)." ago "; + } + } + + $string .= " by ".(split ",", $factinfo{'modified_by'})[0]; + + push(@array,$string); + } # requested: if ($factinfo{'requested_by'}) { @@ -88,24 +95,24 @@ sub CmdFactInfo { $string .= "\002". $requested_count. "\002 ". &fixPlural("time", $requested_count); } - } - $string .= ", " if ($string ne ""); + my $requested_by = $factinfo{'requested_by'}; + $requested_by =~ /\!/; + $string .= ", last by $`"; - my $requested_by = $factinfo{'requested_by'}; - $requested_by =~ /\!/; - $string .= "last by $`"; - - my $requested_time = $factinfo{'requested_time'}; - if ($requested_time) { - if (time() - $requested_time > 60*60*24*7) { - $string .= " at \037". scalar(localtime $requested_time). "\037"; - } else { - $string .= ", ".&Time2String(time() - $requested_time)." ago"; + my $requested_time = $factinfo{'requested_time'}; + if ($requested_time) { + if (time() - $requested_time > 60*60*24*7) { + $string .= " at \037". scalar(localtime $requested_time). "\037"; + } else { + $string .= ", ".&Time2String(time() - $requested_time)." ago"; + } } + } else { + $string = "has not been requested yet"; } - push(@array,$string); + push(@array, $string); } # locked: @@ -119,22 +126,25 @@ sub CmdFactInfo { # factoid was inserted not through the bot. if (!scalar @array) { &performReply("no extra info on \002$faqtoid\002"); - return $noreply; + return; } - &performStrictReply("$factinfo{'factoid_key'} -- ". join("; ", @array) ."."); - return $noreply; + &pSReply("$factinfo{'factoid_key'} -- ". join("; ", @array) ."."); + return; } sub CmdFactStats { my ($type) = @_; if ($type =~ /^author$/i) { - my %hash = &dbGetCol("factoids", "factoid_key","created_by"); + my %hash = &sqlSelectColHash("factoids", + "factoid_key,created_by", undef, + "WHERE created_by IS NOT NULL" + ); my %author; - foreach (keys %hash) { - my $thisnuh = $hash{$_}; + foreach my $factoid (keys %hash) { + my $thisnuh = $hash{$factoid}; $thisnuh =~ /^(\S+)!\S+@\S+$/; $author{lc $1}++; @@ -154,22 +164,25 @@ sub CmdFactStats { my $count; my @list; foreach $count (sort { $b <=> $a } keys %count) { - my $author = join(", ", sort keys %{$count{$count}}); + my $author = join(", ", sort keys %{ $count{$count} }); push(@list, "$count by $author"); } my $prefix = "factoid statistics by author: "; return &formListReply(0, $prefix, @list); - } elsif ($type =~ /^broken$/i) { - &status("factstats(broken): starting..."); - my $start_time = &gettimeofday(); - my %data = &dbGetCol("factoids", "factoid_key","factoid_value"); + } elsif ($type =~ /^vandalism$/i) { + &status("factstats(vandalism): starting..."); + my $start_time = &timeget(); + my %data = &sqlSelectColHash("factoids", + "factoid_key,factoid_value", undef, + "WHERE factoid_value IS NOT NULL" + ); my @list; - my $delta_time = &gettimeofday() - $start_time; - &status(sprintf("factstats(broken): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0); - $start_time = &gettimeofday(); + my $delta_time = &timedelta($start_time); + &status(sprintf("factstats(vandalism): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0); + $start_time = &timeget(); # parse the factoids. foreach (keys %data) { @@ -179,8 +192,73 @@ sub CmdFactStats { } } - $delta_time = &gettimeofday() - $start_time; - &status(sprintf("factstats(broken): %.02f sec to complete.", $delta_time)) if ($delta_time > 0); + $delta_time = &timedelta($start_time); + &status(sprintf("factstats(vandalism): %.02f sec to complete.", $delta_time)) if ($delta_time > 0); + + # bail out on no results. + if (scalar @list == 0) { + return 'no vandalised factoids... wooohoo.'; + } + + # parse the results. + my $prefix = "Vandalised factoid "; + return &formListReply(1, $prefix, @list); + + } elsif ($type =~ /^total$/i) { + &status("factstats(total): starting..."); + my $start_time = &timeget(); + my @list; + my $str; + my($i,$j); + my %hash; + + ### lets do it. + # total factoids requests. + $i = &sumKey("factoids", "requested_count"); + push(@list, "total requests - $i"); + + # total factoids modified. + $str = &countKeys("factoids", "modified_by"); + push(@list, "total modified - $str"); + + # total factoids modified. + $j = &countKeys("factoids", "requested_count"); + $str = &countKeys("factoids", "factoid_key"); + push(@list, "total non-requested - ".($str - $i)); + + # average request/factoid. + # i/j == total(requested_count)/count(requested_count) + $str = sprintf("%.01f", $i/$j); + push(@list, "average requested per factoid - $str"); + + # total prepared for deletion. + $str = scalar( &searchTable("factoids", "factoid_key", "factoid_value", " #DEL") ); + push(@list, "total prepared for deletion - $str"); + + # total unique authors. + # todo: convert to sqlSelectColHash ? (or ColArray?) + foreach ( &sqlRawReturn("SELECT created_by FROM factoids WHERE created_by IS NOT NULL") ) { + /^(\S+)!/; + my $nick = lc $1; + $hash{$nick}++; + } + push(@list, "total unique authors - ".(scalar keys %hash) ); + undef %hash; + + # total unique requesters. + foreach ( &sqlRawReturn("SELECT requested_by FROM factoids WHERE requested_by IS NOT NULL") ) { + /^(\S+)!/; + my $nick = lc $1; + $hash{$nick}++; + } + push(@list, "total unique requesters - ".(scalar keys %hash) ); + undef %hash; + + ### end of "job". + + my $delta_time = &timedelta($start_time); + &status(sprintf("factstats(broken): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0); + $start_time = &timeget(); # bail out on no results. if (scalar @list == 0) { @@ -188,7 +266,7 @@ sub CmdFactStats { } # parse the results. - my $prefix = "broken factoid "; + my $prefix = "General factoid statistics "; return &formListReply(1, $prefix, @list); } elsif ($type =~ /^deadredir$/i) { @@ -200,7 +278,7 @@ sub CmdFactStats { for (@list) { my $factoid = $_; my $val = &getFactInfo($factoid, "factoid_value"); - if ($val =~ /^ see( also)? (.*?)\.?$/i) { + if ($val =~ /^ ?see( also)? (.*?)\.?$/i) { my $redirf = lc $2; my $redir = &getFactInfo($redirf, "factoid_value"); next if (defined $redir); @@ -212,7 +290,7 @@ sub CmdFactStats { my @newlist; foreach $f (keys %redir) { - my @sublist = keys %{$redir{$f}}; + my @sublist = keys %{ $redir{$f} }; for (@sublist) { s/([\,\;]+)/\037$1\037/g; } @@ -225,19 +303,22 @@ sub CmdFactStats { return &formListReply(1, $prefix, @newlist); } elsif ($type =~ /^dup(licate|e)$/i) { - my $start_time = &gettimeofday(); - &status("factstats(dupe): starting..."); - my %hash = &dbGetCol("factoids", "factoid_key", "factoid_value", 1); + &status("factstats(dupe): starting..."); + my $start_time = &timeget(); + my %hash = &sqlSelectColHash("factoids", + "factoid_key,factoid_value", undef, + "WHERE factoid_value IS NOT NULL", 1 + ); + my $refs = 0; my @list; - my $refs = 0; my $v; foreach $v (keys %hash) { - my $count = scalar(keys %{$hash{$v}}); + my $count = scalar(keys %{ $hash{$v} }); next if ($count == 1); my @sublist; - foreach (keys %{$hash{$v}}) { + foreach (keys %{ $hash{$v} }) { if ($v =~ /^ see /i) { $refs++; next; @@ -257,8 +338,8 @@ sub CmdFactStats { } &status("factstats(dupe): (good) dupe refs: $refs."); - my $delta_time = &gettimeofday() - $start_time; - &status(sprintf("factstats(dupe): %.02f sec to complete", $delta_time)) if ($delta_time > 0); + my $delta_time = &timedelta($start_time); + &status(sprintf("factstats(dupe): %.02f sec to complete", $delta_time)) if ($delta_time > 0); # bail out on no results. if (scalar @list == 0) { @@ -269,6 +350,27 @@ sub CmdFactStats { my $prefix = "dupe factoid "; return &formListReply(1, $prefix, @list); + } elsif ($type =~ /^nullfactoids$/i) { + my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE factoid_value=''"; + my $sth = $dbh->prepare($query); + &ERROR("factstats(null): => '$query'.") unless $sth->execute; + + my @list; + while (my @row = $sth->fetchrow_array) { + if ($row[1] ne "") { + &DEBUG("row[1] != NULL for $row[0]."); + next; + } + + &DEBUG("row[0] => '$row[0]'."); + push(@list, $row[0]); + } + $sth->finish; + + # parse the results. + my $prefix = "NULL factoids (not deleted yet) "; + return &formListReply(1, $prefix, @list); + } elsif ($type =~ /^(2|too)short$/i) { # Custom select statement. my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) <= 40"; @@ -282,6 +384,11 @@ sub CmdFactStats { $match++ if ($val =~ /\s{3,}/); next unless ($match); + my $v = &getFactoid($val); + if (defined $v) { + &DEBUG("key $key => $val => $v"); + } + $key =~ s/\,/\037\,\037/g; push(@list, $key); } @@ -317,7 +424,10 @@ sub CmdFactStats { return &formListReply(1, $prefix, @list); } elsif ($type =~ /^locked$/i) { - my %hash = &dbGetCol("factoids", "factoid_key","locked_by"); + my %hash = &sqlSelectColhash("factoids", + "factoid_key,locked_by", undef, + "WHERE locked_by IS NOT NULL" + ); my @list = keys %hash; for (@list) { @@ -328,7 +438,10 @@ sub CmdFactStats { return &formListReply(0, $prefix, @list); } elsif ($type =~ /^new$/i) { - my %hash = &dbGetCol("factoids", "factoid_key","created_time"); + my %hash = &sqlSelectColHash("factoids", + "factoid_key,created_time", undef, + "WHERE created_time IS NOT NULL" + ); my %age; foreach (keys %hash) { @@ -345,7 +458,7 @@ sub CmdFactStats { my @list; foreach (sort {$a <=> $b} keys %age) { - push(@list, join(",", keys %{$age{$_}})); + push(@list, join(",", keys %{ $age{$_} })); } my $prefix = "new factoids in the last 24hours "; @@ -353,7 +466,7 @@ sub CmdFactStats { } elsif ($type =~ /^part(ial)?dupe$/i) { ### requires "custom" select statement... oh well... - my $start_time = &gettimeofday(); + my $start_time = &timeget(); # form length|key and key=length hash list. &status("factstats(partdupe): forming length hash list."); @@ -382,7 +495,7 @@ sub CmdFactStats { my @sublist; my $length; foreach $length (@length) { - foreach (keys %{$length{$length}}) { + foreach (keys %{ $length{$length} }) { if ($key{$_} =~ /^$val/i) { s/([\,\;]+)/\037$1\037/g; s/( and|and )/\037$1\037/g; @@ -393,8 +506,8 @@ sub CmdFactStats { push(@list, join(" ,",@sublist)) if (scalar @sublist); } - my $delta_time = sprintf("%.02fs", &gettimeofday() - $start_time); - &status("factstats(partdupe): $delta_time sec to complete.") if ($delta_time > 0); + my $delta_time = sprintf("%.02fs", &timedelta($start_time) ); + &status("factstats(partdupe): $delta_time sec to complete.") if ($delta_time > 0); # bail out on no results. if (scalar @list == 0) { @@ -406,7 +519,10 @@ sub CmdFactStats { return &formListReply(1, $prefix, @list); } elsif ($type =~ /^profanity$/i) { - my %data = &dbGetCol("factoids", "factoid_key","factoid_value"); + my %data = &sqlSelectColHash("factoids", + "factoid_key,factoid_value", undef, + "WHERE factoid_value IS NOT NULL" + ); my @list; foreach (keys %data) { @@ -422,6 +538,7 @@ sub CmdFactStats { "factoid_value", "^ see "); my %redir; my $f; + my $dangling = 0; for (@list) { my $factoid = $_; @@ -432,14 +549,15 @@ sub CmdFactStats { if (defined $redirval) { $redir{$redir}{$factoid} = 1; } else { - &WARN("factstats(redir): '$factoid' has loose link => '$redir'."); + &DEBUG("factstats(redir): '$factoid' has loose link => '$redir'."); + $dangling++; } } } my @newlist; foreach $f (keys %redir) { - my @sublist = keys %{$redir{$f}}; + my @sublist = keys %{ $redir{$f} }; for (@sublist) { s/([\,\;]+)/\037$1\037/g; } @@ -448,11 +566,14 @@ sub CmdFactStats { } # parse the results. - my $prefix = "Redirections in factoids "; + my $prefix = "Redirections in factoids, $dangling dangling "; return &formListReply(1, $prefix, @newlist); } elsif ($type =~ /^request(ed)?$/i) { - my %hash = &dbGetCol("factoids", "factoid_key", "requested_count",1); + my %hash = &sqlSelectColHash("factoids", + "factoid_key,requested_count", undef, + "WHERE requested_count IS NOT NULL", 1 + ); if (!scalar keys %hash) { return 'sorry, no factoids have been questioned.'; @@ -460,21 +581,47 @@ sub CmdFactStats { my $count; my @list; + my $total = 0; foreach $count (sort {$b <=> $a} keys %hash) { - my @faqtoids = sort keys %{$hash{$count}}; + my @faqtoids = sort keys %{ $hash{$count} }; for (@faqtoids) { s/([\,\;]+)/\037$1\037/g; } + $total += $count * scalar(@faqtoids); push(@list, "$count - ". join(", ", @faqtoids)); } + unshift(@list, "\037$total - TOTAL\037"); my $prefix = "factoid statistics on $type "; return &formListReply(0, $prefix, @list); + } elsif ($type =~ /^reqrate$/i) { + my %hash = &sqlSelectColHash("factoids", + "factoid_key,(unix_timestamp() - created_time)/requested_count as rate", undef, + "WHERE requested_by IS NOT NULL and created_time IS NOT NULL ORDER BY rate LIMIT 15", 1 + ); + + my $rate; + my @list; + my $total = 0; + my $users = 0; + foreach $rate (sort { $b <=> $a } keys %hash) { + my $f = join(", ", sort keys %{ $hash{$rate} }); + my $str = "$f - ".&Time2String($rate); + $str =~ s/\002//g; + push(@list, $str); + } + + my $prefix = "Rank of top factoid rate (time/req): "; + return &formListReply(0, $prefix, @list); + } elsif ($type =~ /^requesters?$/i) { - my %hash = &dbGetCol("factoids", "factoid_key","requested_by"); + my %hash = &sqlSelectColHash("factoids", + "factoid_key,requested_by", undef, + "WHERE requested_by IS NOT NULL" + ); my %requester; foreach (keys %hash) { @@ -491,16 +638,23 @@ sub CmdFactStats { # work-around. my %count; foreach (keys %requester) { - $count{$requester{$_}}{$_} = 1; + $count{ $requester{$_} }{$_} = 1; } undef %requester; my $count; my @list; + my $total = 0; + my $users = 0; foreach $count (sort { $b <=> $a } keys %count) { - my $requester = join(", ", sort keys %{$count{$count}}); + my $requester = join(", ", sort keys %{ $count{$count} }); + $total += $count * scalar(keys %{ $count{$count} }); + $users += scalar(keys %{ $count{$count} }); push(@list, "$count by $requester"); } + unshift(@list, "\037$total TOTAL REQUESTS; $users UNIQUE REQUESTERS\037"); + # should not the above value be the same as collected by + # 'requested'? soemthing weird is going on! my $prefix = "rank of top factoid requesters: "; return &formListReply(0, $prefix, @list); @@ -516,21 +670,22 @@ sub CmdFactStats { for (@list) { my $factoid = $_; my $val = &getFactInfo($factoid, "factoid_value"); - if ($val =~ /^see( also)? (.*?)\.?$/i) { - my $redirf = lc $2; - my $redir = &getFactInfo($redirf, "factoid_value"); - if ($redirf =~ /^\Q$factoid\W$/i) { - &delFactoid($factoid); - $loop{$factoid} = 1; - } + next unless ($val =~ /^see( also)? (.*?)\.?$/i); - if (defined $redir) { # good. - &setFactInfo($factoid,"factoid_value"," see $redir"); - $fixed++; - } else { - push(@newlist, $redirf); - } + my $redirf = lc $2; + my $redir = &getFactInfo($redirf, "factoid_value"); + + if ($redirf =~ /^\Q$factoid\W$/i) { + &delFactoid($factoid); + $loop{$factoid} = 1; + } + + if (defined $redir) { # good. + &setFactInfo($factoid,"factoid_value"," see $redir"); + $fixed++; + } else { + push(@newlist, $redirf); } } @@ -544,6 +699,7 @@ sub CmdFactStats { } elsif ($type =~ /^(2|too)long$/i) { my @list; + my $query; # factoid_key. $query = "SELECT factoid_key FROM factoids WHERE length(factoid_key) >= $param{'maxKeySize'}"; @@ -552,14 +708,16 @@ sub CmdFactStats { while (my @row = $sth->fetchrow_array) { push(@list,$row[0]); } + $sth->finish; # factoid_value. - my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) >= $param{'maxDataSize'}"; + $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) >= $param{'maxDataSize'}"; $sth = $dbh->prepare($query); $sth->execute; while (my @row = $sth->fetchrow_array) { push(@list,sprintf("\002%s\002 - %s", length($row[1]), $row[0])); } + $sth->finish; if (scalar @list == 0) { return "good. no factoids exceed length."; @@ -570,14 +728,10 @@ sub CmdFactStats { return &formListReply(1, $prefix, @list); } elsif ($type =~ /^unrequest(ed)?$/i) { - my @list = &dbRawReturn("SELECT factoid_key FROM factoids WHERE requested_count IS NULL"); + # todo: use sqlSelect() + my ($count) = &sqlRawReturn("SELECT COUNT(*) FROM factoids WHERE requested_count = '0'"); - for (@list) { - s/([\,\;]+)/\037$1\037/g; - } - - my $prefix = "Unrequested factoids "; - return &formListReply(0, $prefix, @list); + return "Unrequested factoids: $count"; } return "error: invalid type => '$type'.";