2 # Factoids.pl: Helpers for generating factoids statistics.
4 # Version: v0.1 (20000514)
5 # Splitted: SQLExtras.pl
10 use vars qw($dbh $who);
14 # Usage: &CmdFactInfo($faqtoid, $query);
16 my ($faqtoid, $query) = (lc $_[0], $_[1]);
17 $faqtoid =~ s/^cmd:/CMD:/;
26 my %factinfo = &sqlSelectRowHash("factoids", "*",
27 { factoid_key => $faqtoid }
30 # factoid does not exist.
31 if (scalar (keys %factinfo) <= 1) {
32 &performReply("there's no such factoid as \002$faqtoid\002");
36 # fix for problem observed by asuffield.
37 # why did it happen though?
38 if (!$factinfo{'factoid_value'}) {
39 &performReply("there's no such factoid as \002$faqtoid\002; deleted because we don't have factoid_value!");
40 foreach (keys %factinfo) {
41 &DEBUG("factinfo{$_} => '$factinfo{$_}'.");
43 ### &delFactoid($faqtoid);
48 if ($factinfo{'created_by'}) {
50 $factinfo{'created_by'} =~ s/\!/ </;
51 $factinfo{'created_by'} .= ">";
52 $string = "created by $factinfo{'created_by'}";
54 my $time = $factinfo{'created_time'};
56 if (time() - $time > 60*60*24*7) {
57 my $days = int( (time() - $time)/60/60/24 );
58 $string .= " at \037". scalar(gmtime $time). "\037" .
61 $string .= " ".&Time2String(time() - $time)." ago";
68 # modified: (TimRiker asks "why do you keep turning this off?)
69 if ($factinfo{'modified_by'}) {
70 $string = "last modified";
72 my $time = $factinfo{'modified_time'};
74 if (time() - $time > 60*60*24*7) {
75 $string .= " at \037". scalar(gmtime $time). "\037";
77 $string .= " ".&Time2String(time() - $time)." ago ";
81 $string .= " by ".(split ",", $factinfo{'modified_by'})[0];
87 if ($factinfo{'requested_by'}) {
88 my $requested_count = $factinfo{'requested_count'};
90 if ($requested_count) {
91 $string = "it has been requested ";
92 if ($requested_count == 1) {
93 $string .= "\002once\002";
95 $string .= "\002". $requested_count. "\002 ".
96 &fixPlural("time", $requested_count);
99 my $requested_by = $factinfo{'requested_by'};
100 $requested_by =~ /\!/;
101 $string .= ", last by $`";
103 my $requested_time = $factinfo{'requested_time'};
104 if ($requested_time) {
105 if (time() - $requested_time > 60*60*24*7) {
106 $string .= " at \037". scalar(localtime $requested_time). "\037";
108 $string .= ", ".&Time2String(time() - $requested_time)." ago";
112 $string = "has not been requested yet";
115 push(@array, $string);
119 if ($factinfo{'locked_by'}) {
120 $factinfo{'locked_by'} =~ /\!/;
121 $string = "it has been locked by $`";
123 push(@array, $string);
126 # factoid was inserted not through the bot.
127 if (!scalar @array) {
128 &performReply("no extra info on \002$faqtoid\002");
132 &pSReply("$factinfo{'factoid_key'} -- ". join("; ", @array) .".");
139 if ($type =~ /^author$/i) {
140 my %hash = &sqlSelectColHash("factoids",
141 "factoid_key,created_by", { },
142 "created_by IS NOT NULL"
146 foreach my $factoid (keys %hash) {
147 my $thisnuh = $hash{$factoid};
149 $thisnuh =~ /^(\S+)!\S+@\S+$/;
153 if (!scalar keys %author) {
154 return 'sorry, no factoids with created_by field.';
159 foreach (keys %author) {
160 $count{ $author{$_} }{$_} = 1;
166 foreach $count (sort { $b <=> $a } keys %count) {
167 my $author = join(", ", sort keys %{ $count{$count} });
168 push(@list, "$count by $author");
171 my $prefix = "factoid statistics by author: ";
172 return &formListReply(0, $prefix, @list);
174 } elsif ($type =~ /^vandalism$/i) {
175 &status("factstats(vandalism): starting...");
176 my $start_time = &timeget();
177 my %data = &sqlSelectColHash("factoids",
178 "factoid_key,factoid_value", { },
179 "factoid_value IS NOT NULL"
183 my $delta_time = &timedelta($start_time);
184 &status(sprintf("factstats(vandalism): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0);
185 $start_time = &timeget();
187 # parse the factoids.
188 foreach (keys %data) {
189 if (&validFactoid($_, $data{$_}) == 0) {
190 s/([\,\;]+)/\037$1\037/g; # highlight chars.
191 push(@list, $_); # push it.
195 $delta_time = &timedelta($start_time);
196 &status(sprintf("factstats(vandalism): %.02f sec to complete.", $delta_time)) if ($delta_time > 0);
198 # bail out on no results.
199 if (scalar @list == 0) {
200 return 'no vandalised factoids... wooohoo.';
204 my $prefix = "Vandalised factoid ";
205 return &formListReply(1, $prefix, @list);
207 } elsif ($type =~ /^total$/i) {
208 &status("factstats(total): starting...");
209 my $start_time = &timeget();
216 # total factoids requests.
217 $i = &sumKey("factoids", "requested_count");
218 push(@list, "total requests - $i");
220 # total factoids modified.
221 $str = &countKeys("factoids", "modified_by");
222 push(@list, "total modified - $str");
224 # total factoids modified.
225 $j = &countKeys("factoids", "requested_count");
226 $str = &countKeys("factoids", "factoid_key");
227 push(@list, "total non-requested - ".($str - $i));
229 # average request/factoid.
230 # i/j == total(requested_count)/count(requested_count)
231 $str = sprintf("%.01f", $i/$j);
232 push(@list, "average requested per factoid - $str");
234 # total prepared for deletion.
235 $str = scalar( &searchTable("factoids", "factoid_key", "factoid_value", " #DEL") );
236 push(@list, "total prepared for deletion - $str");
238 # total unique authors.
239 # todo: convert to sqlSelectColHash ? (or ColArray?)
240 foreach ( &sqlRawReturn("SELECT created_by FROM factoids WHERE created_by IS NOT NULL") ) {
245 push(@list, "total unique authors - ".(scalar keys %hash) );
248 # total unique requesters.
249 foreach ( &sqlRawReturn("SELECT requested_by FROM factoids WHERE requested_by IS NOT NULL") ) {
254 push(@list, "total unique requesters - ".(scalar keys %hash) );
259 my $delta_time = &timedelta($start_time);
260 &status(sprintf("factstats(broken): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0);
261 $start_time = &timeget();
263 # bail out on no results.
264 if (scalar @list == 0) {
265 return 'no broken factoids... wooohoo.';
269 my $prefix = "General factoid statistics ";
270 return &formListReply(1, $prefix, @list);
272 } elsif ($type =~ /^deadredir$/i) {
273 my @list = &searchTable("factoids", "factoid_key",
274 "factoid_value", "^<REPLY> see ");
280 my $val = &getFactInfo($factoid, "factoid_value");
281 if ($val =~ /^<REPLY> ?see( also)? (.*?)\.?$/i) {
283 my $redir = &getFactInfo($redirf, "factoid_value");
284 next if (defined $redir);
285 next if (length $val > 50);
287 $redir{$redirf}{$factoid} = 1;
292 foreach $f (keys %redir) {
293 my @sublist = keys %{ $redir{$f} };
295 s/([\,\;]+)/\037$1\037/g;
298 push(@newlist, join(', ', @sublist)." => $f");
302 my $prefix = "Loose link (dead) redirections in factoids ";
303 return &formListReply(1, $prefix, @newlist);
305 } elsif ($type =~ /^dup(licate|e)$/i) {
306 &status("factstats(dupe): starting...");
307 my $start_time = &timeget();
308 my %hash = &sqlSelectColHash("factoids",
309 "factoid_key,factoid_value", { },
310 "factoid_value IS NOT NULL", 1
316 foreach $v (keys %hash) {
317 my $count = scalar(keys %{ $hash{$v} });
318 next if ($count == 1);
321 foreach (keys %{ $hash{$v} }) {
322 if ($v =~ /^<REPLY> see /i) {
327 s/([\,\;]+)/\037$1\037/g;
329 &WARN("dupe: _ = NULL. should never happen!.");
335 next unless (scalar @sublist);
337 push(@list, join(", ", @sublist));
340 &status("factstats(dupe): (good) dupe refs: $refs.");
341 my $delta_time = &timedelta($start_time);
342 &status(sprintf("factstats(dupe): %.02f sec to complete", $delta_time)) if ($delta_time > 0);
344 # bail out on no results.
345 if (scalar @list == 0) {
346 return "no duplicate factoids... woohoo.";
350 my $prefix = "dupe factoid ";
351 return &formListReply(1, $prefix, @list);
353 } elsif ($type =~ /^nullfactoids$/i) {
354 my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE factoid_value=''";
355 my $sth = $dbh->prepare($query);
356 &ERROR("factstats(null): => '$query'.") unless $sth->execute;
359 while (my @row = $sth->fetchrow_array) {
361 &DEBUG("row[1] != NULL for $row[0].");
365 &DEBUG("row[0] => '$row[0]'.");
366 push(@list, $row[0]);
371 my $prefix = "NULL factoids (not deleted yet) ";
372 return &formListReply(1, $prefix, @list);
374 } elsif ($type =~ /^(2|too)short$/i) {
375 # Custom select statement.
376 my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) <= 40";
377 my $sth = $dbh->prepare($query);
378 &ERROR("factstats(lame): => '$query'.") unless $sth->execute;
381 while (my @row = $sth->fetchrow_array) {
382 my($key,$val) = ($row[0], $row[1]);
384 $match++ if ($val =~ /\s{3,}/);
385 next unless ($match);
387 my $v = &getFactoid($val);
389 &DEBUG("key $key => $val => $v");
392 $key =~ s/\,/\037\,\037/g;
398 my $prefix = "Lame factoids ";
399 return &formListReply(1, $prefix, @list);
401 } elsif ($type =~ /^listfix$/i) {
402 # Custom select statement.
403 my $query = "SELECT factoid_key,factoid_value FROM factoids";
404 my $sth = $dbh->prepare($query);
405 &ERROR("factstats(listfix): => '$query'.") unless $sth->execute;
408 while (my @row = $sth->fetchrow_array) {
409 my($key,$val) = ($row[0], $row[1]);
411 $match++ if ($val =~ /\S+,? or \S+,? or \S+,? or \S+,?/);
412 next unless ($match);
414 $key =~ s/\,/\037\,\037/g;
416 $val =~ s/,? or /, /g;
417 &DEBUG("fixed: => $val.");
418 &setFactInfo($key,"factoid_value", $val);
423 my $prefix = "Inefficient lists fixed ";
424 return &formListReply(1, $prefix, @list);
426 } elsif ($type =~ /^locked$/i) {
427 my %hash = &sqlSelectColhash("factoids",
428 "factoid_key,locked_by", { },
429 "locked_by IS NOT NULL"
431 my @list = keys %hash;
434 s/([\,\;]+)/\037$1\037/g;
437 my $prefix = "factoid statistics on $type ";
438 return &formListReply(0, $prefix, @list);
440 } elsif ($type =~ /^new$/i) {
441 my %hash = &sqlSelectColHash("factoids",
442 "factoid_key,created_time", { },
443 "created_time IS NOT NULL"
447 foreach (keys %hash) {
448 my $created_time = $hash{$_};
449 my $delta_time = time() - $created_time;
450 next if ($delta_time >= 60*60*24);
452 $age{$delta_time}{$_} = 1;
455 if (scalar keys %age == 0) {
456 return "sorry, no new factoids.";
460 foreach (sort {$a <=> $b} keys %age) {
461 push(@list, join(",", keys %{ $age{$_} }));
464 my $prefix = "new factoids in the last 24hours ";
465 return &formListReply(0, $prefix, @list);
467 } elsif ($type =~ /^part(ial)?dupe$/i) {
468 ### requires "custom" select statement... oh well...
469 my $start_time = &timeget();
471 # form length|key and key=length hash list.
472 &status("factstats(partdupe): forming length hash list.");
473 my $query = "SELECT factoid_key,factoid_value,length(factoid_value) AS length FROM factoids WHERE length(factoid_value) >= 192 ORDER BY length";
474 my $sth = $dbh->prepare($query);
475 &ERROR("factstats(partdupe): => '$query'.") unless $sth->execute;
479 while (my @row = $sth->fetchrow_array) {
480 $length{$row[2]}{$row[0]} = 1; # length(value)|key.
481 $key{$row[0]} = $row[1]; # key=value.
485 &status("factstats(partdupe): total keys => '". scalar(@key) ."'.");
486 &status("factstats(partdupe): now deciphering data gathered");
488 my @length = sort { $a <=> $b } keys %length;
491 foreach $key (@key) {
492 shift @length if (length $key{$key} == $length[0]);
494 my $val = quotemeta $key{$key};
497 foreach $length (@length) {
498 foreach (keys %{ $length{$length} }) {
499 if ($key{$_} =~ /^$val/i) {
500 s/([\,\;]+)/\037$1\037/g;
501 s/( and|and )/\037$1\037/g;
502 push(@sublist,$key." and ".$_);
506 push(@list, join(" ,",@sublist)) if (scalar @sublist);
509 my $delta_time = sprintf("%.02fs", &timedelta($start_time) );
510 &status("factstats(partdupe): $delta_time sec to complete.") if ($delta_time > 0);
512 # bail out on no results.
513 if (scalar @list == 0) {
514 return "no initial partial duplicate factoids... woohoo.";
518 my $prefix = "initial partial dupe factoid ";
519 return &formListReply(1, $prefix, @list);
521 } elsif ($type =~ /^profanity$/i) {
522 my %data = &sqlSelectColHash("factoids",
523 "factoid_key,factoid_value", { },
524 "factoid_value IS NOT NULL"
528 foreach (keys %data) {
529 push(@list, $_) if (&hasProfanity($_." ".$data{$_}));
533 my $prefix = "Profanity in factoids ";
534 return &formListReply(1, $prefix, @list);
536 } elsif ($type =~ /^redir(ection)?$/i) {
537 my @list = &searchTable("factoids", "factoid_key",
538 "factoid_value", "^<REPLY> see ");
545 my $val = &getFactInfo($factoid, "factoid_value");
546 if ($val =~ /^<REPLY> see( also)? (.*?)\.?$/i) {
548 my $redirval = &getFactInfo($redir, "factoid_value");
549 if (defined $redirval) {
550 $redir{$redir}{$factoid} = 1;
552 &DEBUG("factstats(redir): '$factoid' has loose link => '$redir'.");
559 foreach $f (keys %redir) {
560 my @sublist = keys %{ $redir{$f} };
562 s/([\,\;]+)/\037$1\037/g;
565 push(@newlist, "$f => ". join(', ', @sublist));
569 my $prefix = "Redirections in factoids, $dangling dangling ";
570 return &formListReply(1, $prefix, @newlist);
572 } elsif ($type =~ /^request(ed)?$/i) {
573 my %hash = &sqlSelectColHash("factoids",
574 "factoid_key,requested_count", { },
575 "requested_count IS NOT NULL", 1
578 if (!scalar keys %hash) {
579 return 'sorry, no factoids have been questioned.';
585 foreach $count (sort {$b <=> $a} keys %hash) {
586 my @faqtoids = sort keys %{ $hash{$count} };
589 s/([\,\;]+)/\037$1\037/g;
591 $total += $count * scalar(@faqtoids);
593 push(@list, "$count - ". join(", ", @faqtoids));
595 unshift(@list, "\037$total - TOTAL\037");
597 my $prefix = "factoid statistics on $type ";
598 return &formListReply(0, $prefix, @list);
600 } elsif ($type =~ /^reqrate$/i) {
601 my %hash = &sqlSelectColHash("factoids",
602 "factoid_key,(unix_timestamp() - created_time)/requested_count as rate", { },
603 "requested_by IS NOT NULL and created_time IS NOT NULL ORDER BY rate LIMIT 15", 1
610 foreach $rate (sort { $b <=> $a } keys %hash) {
611 my $f = join(", ", sort keys %{ $hash{$rate} });
612 my $str = "$f - ".&Time2String($rate);
617 my $prefix = "Rank of top factoid rate (time/req): ";
618 return &formListReply(0, $prefix, @list);
620 } elsif ($type =~ /^requesters?$/i) {
621 my %hash = &sqlSelectColHash("factoids",
622 "factoid_key,requested_by", { },
623 "requested_by IS NOT NULL"
627 foreach (keys %hash) {
628 my $thisnuh = $hash{$_};
630 $thisnuh =~ /^(\S+)!\S+@\S+$/;
634 if (!scalar keys %requester) {
635 return 'sorry, no factoids with requested_by field.';
640 foreach (keys %requester) {
641 $count{ $requester{$_} }{$_} = 1;
649 foreach $count (sort { $b <=> $a } keys %count) {
650 my $requester = join(", ", sort keys %{ $count{$count} });
651 $total += $count * scalar(keys %{ $count{$count} });
652 $users += scalar(keys %{ $count{$count} });
653 push(@list, "$count by $requester");
655 unshift(@list, "\037$total TOTAL REQUESTS; $users UNIQUE REQUESTERS\037");
656 # should not the above value be the same as collected by
657 # 'requested'? soemthing weird is going on!
659 my $prefix = "rank of top factoid requesters: ";
660 return &formListReply(0, $prefix, @list);
662 } elsif ($type =~ /^seefix$/i) {
663 my @list = &searchTable("factoids", "factoid_key",
664 "factoid_value", "^see ");
672 my $val = &getFactInfo($factoid, "factoid_value");
674 next unless ($val =~ /^see( also)? (.*?)\.?$/i);
677 my $redir = &getFactInfo($redirf, "factoid_value");
679 if ($redirf =~ /^\Q$factoid\W$/i) {
680 &delFactoid($factoid);
684 if (defined $redir) { # good.
685 &setFactInfo($factoid,"factoid_value","<REPLY> see $redir");
688 push(@newlist, $redirf);
693 &msg($who, "Fixed $fixed factoids.");
694 &msg($who, "Self looped factoids removed: ".
695 sort(keys %loop) ) if (scalar keys %loop);
697 my $prefix = "Loose link (dead) redirections in factoids ";
698 return &formListReply(1, $prefix, @newlist);
700 } elsif ($type =~ /^(2|too)long$/i) {
705 $query = "SELECT factoid_key FROM factoids WHERE length(factoid_key) >= $param{'maxKeySize'}";
706 my $sth = $dbh->prepare($query);
708 while (my @row = $sth->fetchrow_array) {
714 $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) >= $param{'maxDataSize'}";
715 $sth = $dbh->prepare($query);
717 while (my @row = $sth->fetchrow_array) {
718 push(@list,sprintf("\002%s\002 - %s", length($row[1]), $row[0]));
722 if (scalar @list == 0) {
723 return "good. no factoids exceed length.";
727 my $prefix = "factoid key||value exceeding length ";
728 return &formListReply(1, $prefix, @list);
730 } elsif ($type =~ /^unrequest(ed)?$/i) {
731 # todo: use sqlSelect()
732 my ($count) = &sqlRawReturn("SELECT COUNT(*) FROM factoids WHERE requested_count = '0'");
734 return "Unrequested factoids: $count";
737 return "error: invalid type => '$type'.";
742 my @list = &searchTable("factoids","factoid_key", "created_by", "^$query!");
744 my $prefix = "factoid author list by '$query' ";
745 &performStrictReply( &formListReply(1, $prefix, @list) );