2 # Factoids.pl: Helpers for generating factoids statistics.
4 # Version: v0.1 (20000514)
5 # Splitted: SQLExtras.pl
10 use vars qw($dbh $who);
14 # Usage: &CmdFactInfo($faqtoid, $query);
16 my ($faqtoid, $query) = (lc $_[0], $_[1]);
25 my %factinfo = &dbGetColNiceHash("factoids", "*", "factoid_key=".&dbQuote($faqtoid));
27 # factoid does not exist.
28 if (scalar (keys %factinfo) <= 1) {
29 &performReply("there's no such factoid as \002$faqtoid\002");
33 # fix for problem observed by asuffield.
34 # why did it happen though?
35 if (!$factinfo{'factoid_value'}) {
36 &performReply("there's no such factoid as \002$faqtoid\002; deleted because we don't have factoid_value!");
37 foreach (keys %factinfo) {
38 &DEBUG("factinfo{$_} => '$factinfo{$_}'.");
40 ### &delFactoid($faqtoid);
45 if ($factinfo{'created_by'}) {
47 $factinfo{'created_by'} =~ s/\!/ </;
48 $factinfo{'created_by'} .= ">";
49 $string = "created by $factinfo{'created_by'}";
51 my $time = $factinfo{'created_time'};
53 if (time() - $time > 60*60*24*7) {
54 my $days = int( (time() - $time)/60/60/24 );
55 $string .= " at \037". scalar(localtime $time). "\037" .
58 $string .= " ".&Time2String(time() - $time)." ago";
66 if ($factinfo{'modified_by'}) {
67 $string = "last modified";
69 my $time = $factinfo{'modified_time'};
71 if (time() - $time > 60*60*24*7) {
72 $string .= " at \037". scalar(localtime $time). "\037";
74 $string .= " ".&Time2String(time() - $time)." ago ";
79 foreach (split ",", $factinfo{'modified_by'}) {
83 $string .= "by ".&IJoin(@x);
90 if ($factinfo{'requested_by'}) {
91 my $requested_count = $factinfo{'requested_count'};
93 if ($requested_count) {
94 $string = "it has been requested ";
95 if ($requested_count == 1) {
96 $string .= "\002once\002";
98 $string .= "\002". $requested_count. "\002 ".
99 &fixPlural("time", $requested_count);
103 $string .= ", " if ($string ne "");
105 my $requested_by = $factinfo{'requested_by'};
106 $requested_by =~ /\!/;
107 $string .= "last by $`";
109 my $requested_time = $factinfo{'requested_time'};
110 if ($requested_time) {
111 if (time() - $requested_time > 60*60*24*7) {
112 $string .= " at \037". scalar(localtime $requested_time). "\037";
114 $string .= ", ".&Time2String(time() - $requested_time)." ago";
118 push(@array,$string);
122 if ($factinfo{'locked_by'}) {
123 $factinfo{'locked_by'} =~ /\!/;
124 $string = "it has been locked by $`";
126 push(@array, $string);
129 # factoid was inserted not through the bot.
130 if (!scalar @array) {
131 &performReply("no extra info on \002$faqtoid\002");
135 &performStrictReply("$factinfo{'factoid_key'} -- ". join("; ", @array) .".");
142 if ($type =~ /^author$/i) {
143 my %hash = &dbGetCol("factoids", "factoid_key,created_by", "created_by IS NOT NULL");
146 foreach (keys %hash) {
147 my $thisnuh = $hash{$_};
149 $thisnuh =~ /^(\S+)!\S+@\S+$/;
153 if (!scalar keys %author) {
154 return 'sorry, no factoids with created_by field.';
159 foreach (keys %author) {
160 $count{ $author{$_} }{$_} = 1;
166 foreach $count (sort { $b <=> $a } keys %count) {
167 my $author = join(", ", sort keys %{ $count{$count} });
168 push(@list, "$count by $author");
171 my $prefix = "factoid statistics by author: ";
172 return &formListReply(0, $prefix, @list);
174 } elsif ($type =~ /^vandalism$/i) {
175 &status("factstats(vandalism): starting...");
176 my $start_time = &timeget();
177 my %data = &dbGetCol("factoids", "factoid_key,factoid_value", "factoid_value IS NOT NULL");
180 my $delta_time = &timedelta($start_time);
181 &status(sprintf("factstats(vandalismbroken): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0);
182 $start_time = &timeget();
184 # parse the factoids.
185 foreach (keys %data) {
186 if (&validFactoid($_, $data{$_}) == 0) {
187 s/([\,\;]+)/\037$1\037/g; # highlight chars.
188 push(@list, $_); # push it.
192 $delta_time = &timedelta($start_time);
193 &status(sprintf("factstats(vandalism): %.02f sec to complete.", $delta_time)) if ($delta_time > 0);
195 # bail out on no results.
196 if (scalar @list == 0) {
197 return 'no vandalised factoids... wooohoo.';
201 my $prefix = "Vandalised factoid ";
202 return &formListReply(1, $prefix, @list);
204 } elsif ($type =~ /^total$/i) {
205 &status("factstats(total): starting...");
206 my $start_time = &timeget();
213 # total factoids requests.
214 $i = &sumKey("factoids", "requested_count");
215 push(@list, "total requests - $i");
217 # total factoids modified.
218 $str = &countKeys("factoids", "modified_by");
219 push(@list, "total modified - $str");
221 # total factoids modified.
222 $j = &countKeys("factoids", "requested_count");
223 $str = &countKeys("factoids", "factoid_key");
224 push(@list, "total non-requested - ".($str - $i));
226 # average request/factoid.
227 # i/j == total(requested_count)/count(requested_count)
228 $str = sprintf("%.01f", $i/$j);
229 push(@list, "average requested per factoid - $str");
231 # total prepared for deletion.
232 $str = scalar( &searchTable("factoids", "factoid_key", "factoid_value", " #DEL") );
233 push(@list, "total prepared for deletion - $str");
235 # total unique authors.
236 foreach ( &dbRawReturn("SELECT created_by FROM factoids WHERE created_by IS NOT NULL") ) {
241 push(@list, "total unique authors - ".(scalar keys %hash) );
244 # total unique requesters.
245 foreach ( &dbRawReturn("SELECT requested_by FROM factoids WHERE requested_by IS NOT NULL") ) {
250 push(@list, "total unique requesters - ".(scalar keys %hash) );
255 my $delta_time = &timedelta($start_time);
256 &status(sprintf("factstats(broken): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0);
257 $start_time = &timeget();
259 # bail out on no results.
260 if (scalar @list == 0) {
261 return 'no broken factoids... wooohoo.';
265 my $prefix = "General factoid stiatistics ";
266 return &formListReply(1, $prefix, @list);
268 } elsif ($type =~ /^deadredir$/i) {
269 my @list = &searchTable("factoids", "factoid_key",
270 "factoid_value", "^<REPLY> see ");
276 my $val = &getFactInfo($factoid, "factoid_value");
277 if ($val =~ /^<REPLY> ?see( also)? (.*?)\.?$/i) {
279 my $redir = &getFactInfo($redirf, "factoid_value");
280 next if (defined $redir);
281 next if (length $val > 50);
283 $redir{$redirf}{$factoid} = 1;
288 foreach $f (keys %redir) {
289 my @sublist = keys %{ $redir{$f} };
291 s/([\,\;]+)/\037$1\037/g;
294 push(@newlist, join(', ', @sublist)." => $f");
298 my $prefix = "Loose link (dead) redirections in factoids ";
299 return &formListReply(1, $prefix, @newlist);
301 } elsif ($type =~ /^dup(licate|e)$/i) {
302 &status("factstats(dupe): starting...");
303 my $start_time = &timeget();
304 my %hash = &dbGetCol("factoids", "factoid_key,factoid_value", "factoid_value IS NOT NULL", 1);
309 foreach $v (keys %hash) {
310 my $count = scalar(keys %{ $hash{$v} });
311 next if ($count == 1);
314 foreach (keys %{ $hash{$v} }) {
315 if ($v =~ /^<REPLY> see /i) {
320 s/([\,\;]+)/\037$1\037/g;
322 &WARN("dupe: _ = NULL. should never happen!.");
328 next unless (scalar @sublist);
330 push(@list, join(", ", @sublist));
333 &status("factstats(dupe): (good) dupe refs: $refs.");
334 my $delta_time = &timedelta($start_time);
335 &status(sprintf("factstats(dupe): %.02f sec to complete", $delta_time)) if ($delta_time > 0);
337 # bail out on no results.
338 if (scalar @list == 0) {
339 return "no duplicate factoids... woohoo.";
343 my $prefix = "dupe factoid ";
344 return &formListReply(1, $prefix, @list);
346 } elsif ($type =~ /^nullfactoids$/i) {
347 my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE factoid_value=''";
348 my $sth = $dbh->prepare($query);
349 &ERROR("factstats(null): => '$query'.") unless $sth->execute;
352 while (my @row = $sth->fetchrow_array) {
354 &DEBUG("row[1] != NULL for $row[0].");
358 &DEBUG("row[0] => '$row[0]'.");
359 push(@list, $row[0]);
364 my $prefix = "NULL factoids (not deleted yet) ";
365 return &formListReply(1, $prefix, @list);
367 } elsif ($type =~ /^(2|too)short$/i) {
368 # Custom select statement.
369 my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) <= 40";
370 my $sth = $dbh->prepare($query);
371 &ERROR("factstats(lame): => '$query'.") unless $sth->execute;
374 while (my @row = $sth->fetchrow_array) {
375 my($key,$val) = ($row[0], $row[1]);
377 $match++ if ($val =~ /\s{3,}/);
378 next unless ($match);
380 my $v = &getFactoid($val);
382 &DEBUG("key $key => $val => $v");
385 $key =~ s/\,/\037\,\037/g;
391 my $prefix = "Lame factoids ";
392 return &formListReply(1, $prefix, @list);
394 } elsif ($type =~ /^listfix$/i) {
395 # Custom select statement.
396 my $query = "SELECT factoid_key,factoid_value FROM factoids";
397 my $sth = $dbh->prepare($query);
398 &ERROR("factstats(listfix): => '$query'.") unless $sth->execute;
401 while (my @row = $sth->fetchrow_array) {
402 my($key,$val) = ($row[0], $row[1]);
404 $match++ if ($val =~ /\S+,? or \S+,? or \S+,? or \S+,?/);
405 next unless ($match);
407 $key =~ s/\,/\037\,\037/g;
409 $val =~ s/,? or /, /g;
410 &DEBUG("fixed: => $val.");
411 &setFactInfo($key,"factoid_value", $val);
416 my $prefix = "Inefficient lists fixed ";
417 return &formListReply(1, $prefix, @list);
419 } elsif ($type =~ /^locked$/i) {
420 my %hash = &dbGetCol("factoids", "factoid_key,locked_by", "locked_by IS NOT NULL");
421 my @list = keys %hash;
424 s/([\,\;]+)/\037$1\037/g;
427 my $prefix = "factoid statistics on $type ";
428 return &formListReply(0, $prefix, @list);
430 } elsif ($type =~ /^new$/i) {
431 my %hash = &dbGetCol("factoids", "factoid_key,created_time", "created_time IS NOT NULL");
434 foreach (keys %hash) {
435 my $created_time = $hash{$_};
436 my $delta_time = time() - $created_time;
437 next if ($delta_time >= 60*60*24);
439 $age{$delta_time}{$_} = 1;
442 if (scalar keys %age == 0) {
443 return "sorry, no new factoids.";
447 foreach (sort {$a <=> $b} keys %age) {
448 push(@list, join(",", keys %{ $age{$_} }));
451 my $prefix = "new factoids in the last 24hours ";
452 return &formListReply(0, $prefix, @list);
454 } elsif ($type =~ /^part(ial)?dupe$/i) {
455 ### requires "custom" select statement... oh well...
456 my $start_time = &timeget();
458 # form length|key and key=length hash list.
459 &status("factstats(partdupe): forming length hash list.");
460 my $query = "SELECT factoid_key,factoid_value,length(factoid_value) AS length FROM factoids WHERE length(factoid_value) >= 192 ORDER BY length";
461 my $sth = $dbh->prepare($query);
462 &ERROR("factstats(partdupe): => '$query'.") unless $sth->execute;
466 while (my @row = $sth->fetchrow_array) {
467 $length{$row[2]}{$row[0]} = 1; # length(value)|key.
468 $key{$row[0]} = $row[1]; # key=value.
472 &status("factstats(partdupe): total keys => '". scalar(@key) ."'.");
473 &status("factstats(partdupe): now deciphering data gathered");
475 my @length = sort { $a <=> $b } keys %length;
478 foreach $key (@key) {
479 shift @length if (length $key{$key} == $length[0]);
481 my $val = quotemeta $key{$key};
484 foreach $length (@length) {
485 foreach (keys %{ $length{$length} }) {
486 if ($key{$_} =~ /^$val/i) {
487 s/([\,\;]+)/\037$1\037/g;
488 s/( and|and )/\037$1\037/g;
489 push(@sublist,$key." and ".$_);
493 push(@list, join(" ,",@sublist)) if (scalar @sublist);
496 my $delta_time = sprintf("%.02fs", &timedelta($start_time) );
497 &status("factstats(partdupe): $delta_time sec to complete.") if ($delta_time > 0);
499 # bail out on no results.
500 if (scalar @list == 0) {
501 return "no initial partial duplicate factoids... woohoo.";
505 my $prefix = "initial partial dupe factoid ";
506 return &formListReply(1, $prefix, @list);
508 } elsif ($type =~ /^profanity$/i) {
509 my %data = &dbGetCol("factoids", "factoid_key,factoid_value", "factoid_value IS NOT NULL");
512 foreach (keys %data) {
513 push(@list, $_) if (&hasProfanity($_." ".$data{$_}));
517 my $prefix = "Profanity in factoids ";
518 return &formListReply(1, $prefix, @list);
520 } elsif ($type =~ /^redir(ection)?$/i) {
521 my @list = &searchTable("factoids", "factoid_key",
522 "factoid_value", "^<REPLY> see ");
528 my $val = &getFactInfo($factoid, "factoid_value");
529 if ($val =~ /^<REPLY> see( also)? (.*?)\.?$/i) {
531 my $redirval = &getFactInfo($redir, "factoid_value");
532 if (defined $redirval) {
533 $redir{$redir}{$factoid} = 1;
535 &WARN("factstats(redir): '$factoid' has loose link => '$redir'.");
541 foreach $f (keys %redir) {
542 my @sublist = keys %{ $redir{$f} };
544 s/([\,\;]+)/\037$1\037/g;
547 push(@newlist, "$f => ". join(', ', @sublist));
551 my $prefix = "Redirections in factoids ";
552 return &formListReply(1, $prefix, @newlist);
554 } elsif ($type =~ /^request(ed)?$/i) {
555 my %hash = &dbGetCol("factoids", "factoid_key,requested_count", "requested_count IS NOT NULL", 1);
557 if (!scalar keys %hash) {
558 return 'sorry, no factoids have been questioned.';
564 foreach $count (sort {$b <=> $a} keys %hash) {
565 my @faqtoids = sort keys %{ $hash{$count} };
568 s/([\,\;]+)/\037$1\037/g;
570 $total += $count * scalar(@faqtoids);
572 push(@list, "$count - ". join(", ", @faqtoids));
574 unshift(@list, "\037$total - TOTAL\037");
576 my $prefix = "factoid statistics on $type ";
577 return &formListReply(0, $prefix, @list);
579 } elsif ($type =~ /^reqrate$/i) {
580 my %hash = &dbGetCol("factoids",
581 "factoid_key,(unix_timestamp() - created_time)/requested_count as rate",
582 "requested_by IS NOT NULL and created_time IS NOT NULL ORDER BY rate LIMIT 15", 1);
588 foreach $rate (sort { $b <=> $a } keys %hash) {
589 my $f = join(", ", sort keys %{ $hash{$rate} });
590 my $str = "$f - ".&Time2String($rate);
595 my $prefix = "Rank of top factoid rate (time/req): ";
596 return &formListReply(0, $prefix, @list);
598 } elsif ($type =~ /^requesters?$/i) {
599 my %hash = &dbGetCol("factoids", "factoid_key,requested_by", "requested_by IS NOT NULL");
602 foreach (keys %hash) {
603 my $thisnuh = $hash{$_};
605 $thisnuh =~ /^(\S+)!\S+@\S+$/;
609 if (!scalar keys %requester) {
610 return 'sorry, no factoids with requested_by field.';
615 foreach (keys %requester) {
616 $count{ $requester{$_} }{$_} = 1;
624 foreach $count (sort { $b <=> $a } keys %count) {
625 my $requester = join(", ", sort keys %{ $count{$count} });
626 $total += $count * scalar(keys %{ $count{$count} });
627 $users += scalar(keys %{ $count{$count} });
628 push(@list, "$count by $requester");
630 unshift(@list, "\037$total TOTAL REQUESTS; $users UNIQUE REQUESTERS\037");
631 # should not the above value be the same as collected by
632 # 'requested'? soemthing weird is going on!
634 my $prefix = "rank of top factoid requesters: ";
635 return &formListReply(0, $prefix, @list);
637 } elsif ($type =~ /^seefix$/i) {
638 my @list = &searchTable("factoids", "factoid_key",
639 "factoid_value", "^see ");
647 my $val = &getFactInfo($factoid, "factoid_value");
649 next unless ($val =~ /^see( also)? (.*?)\.?$/i);
652 my $redir = &getFactInfo($redirf, "factoid_value");
654 if ($redirf =~ /^\Q$factoid\W$/i) {
655 &delFactoid($factoid);
659 if (defined $redir) { # good.
660 &setFactInfo($factoid,"factoid_value","<REPLY> see $redir");
663 push(@newlist, $redirf);
668 &msg($who, "Fixed $fixed factoids.");
669 &msg($who, "Self looped factoids removed: ".
670 sort(keys %loop) ) if (scalar keys %loop);
672 my $prefix = "Loose link (dead) redirections in factoids ";
673 return &formListReply(1, $prefix, @newlist);
675 } elsif ($type =~ /^(2|too)long$/i) {
680 $query = "SELECT factoid_key FROM factoids WHERE length(factoid_key) >= $param{'maxKeySize'}";
681 my $sth = $dbh->prepare($query);
683 while (my @row = $sth->fetchrow_array) {
689 $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) >= $param{'maxDataSize'}";
690 $sth = $dbh->prepare($query);
692 while (my @row = $sth->fetchrow_array) {
693 push(@list,sprintf("\002%s\002 - %s", length($row[1]), $row[0]));
697 if (scalar @list == 0) {
698 return "good. no factoids exceed length.";
702 my $prefix = "factoid key||value exceeding length ";
703 return &formListReply(1, $prefix, @list);
705 } elsif ($type =~ /^unrequest(ed)?$/i) {
706 my @list = &dbRawReturn("SELECT factoid_key FROM factoids WHERE requested_count IS NULL");
709 s/([\,\;]+)/\037$1\037/g;
712 my $prefix = "Unrequested factoids ";
713 return &formListReply(0, $prefix, @list);
716 return "error: invalid type => '$type'.";
721 my @list = &searchTable("factoids","factoid_key", "created_by", "^$query!");
723 my $prefix = "factoid author list by '$query' ";
724 &performStrictReply( &formListReply(1, $prefix, @list) );