2 # Factoids.pl: Helpers for generating factoids statistics.
4 # Version: v0.1 (20000514)
5 # Splitted: SQLExtras.pl
8 if (&IsParam("useStrict")) { use strict; }
11 # Usage: &CmdFactInfo($faqtoid, $query);
13 my ($faqtoid, $query) = (lc $_[0], $_[1]);
22 my %factinfo = &dbGetColNiceHash("factoids", "*", "factoid_key=".&dbQuote($faqtoid));
24 # factoid does not exist.
25 if (scalar (keys %factinfo) <= 1) {
26 &performReply("there's no such factoid as \002$faqtoid\002");
30 # fix for problem observed by asuffield.
31 # why did it happen though?
32 if (!$factinfo{'factoid_value'}) {
33 &performReply("there's no such factoid as \002$faqtoid\002; deleted because we don't have factoid_value!");
34 foreach (keys %factinfo) {
35 &DEBUG("factinfo{$_} => '$factinfo{$_}'.");
37 ### &delFactoid($faqtoid);
42 if ($factinfo{'created_by'}) {
44 $factinfo{'created_by'} =~ s/\!/ </;
45 $factinfo{'created_by'} .= ">";
46 $string = "created by $factinfo{'created_by'}";
48 my $time = $factinfo{'created_time'};
50 if (time() - $time > 60*60*24*7) {
51 my $days = int( (time() - $time)/60/60/24 );
52 $string .= " at \037". scalar(localtime $time). "\037" .
55 $string .= " ".&Time2String(time() - $time)." ago";
63 if ($factinfo{'modified_by'}) {
64 $string = "last modified";
66 my $time = $factinfo{'modified_time'};
68 if (time() - $time > 60*60*24*7) {
69 $string .= " at \037". scalar(localtime $time). "\037";
71 $string .= " ".&Time2String(time() - $time)." ago ";
76 foreach (split ",", $factinfo{'modified_by'}) {
80 $string .= "by ".&IJoin(@x);
87 if ($factinfo{'requested_by'}) {
88 my $requested_count = $factinfo{'requested_count'};
90 if ($requested_count) {
91 $string = "it has been requested ";
92 if ($requested_count == 1) {
93 $string .= "\002once\002";
95 $string .= "\002". $requested_count. "\002 ".
96 &fixPlural("time", $requested_count);
100 $string .= ", " if ($string ne "");
102 my $requested_by = $factinfo{'requested_by'};
103 $requested_by =~ /\!/;
104 $string .= "last by $`";
106 my $requested_time = $factinfo{'requested_time'};
107 if ($requested_time) {
108 if (time() - $requested_time > 60*60*24*7) {
109 $string .= " at \037". scalar(localtime $requested_time). "\037";
111 $string .= ", ".&Time2String(time() - $requested_time)." ago";
115 push(@array,$string);
119 if ($factinfo{'locked_by'}) {
120 $factinfo{'locked_by'} =~ /\!/;
121 $string = "it has been locked by $`";
123 push(@array, $string);
126 # factoid was inserted not through the bot.
127 if (!scalar @array) {
128 &performReply("no extra info on \002$faqtoid\002");
132 &performStrictReply("$factinfo{'factoid_key'} -- ". join("; ", @array) .".");
139 if ($type =~ /^author$/i) {
140 my %hash = &dbGetCol("factoids", "factoid_key,created_by", "created_by IS NOT NULL");
143 foreach (keys %hash) {
144 my $thisnuh = $hash{$_};
146 $thisnuh =~ /^(\S+)!\S+@\S+$/;
150 if (!scalar keys %author) {
151 return 'sorry, no factoids with created_by field.';
156 foreach (keys %author) {
157 $count{ $author{$_} }{$_} = 1;
163 foreach $count (sort { $b <=> $a } keys %count) {
164 my $author = join(", ", sort keys %{ $count{$count} });
165 push(@list, "$count by $author");
168 my $prefix = "factoid statistics by author: ";
169 return &formListReply(0, $prefix, @list);
171 } elsif ($type =~ /^vandalism$/i) {
172 &status("factstats(vandalism): starting...");
173 my $start_time = &timeget();
174 my %data = &dbGetCol("factoids", "factoid_key,factoid_value", "factoid_value IS NOT NULL");
177 my $delta_time = &timedelta($start_time);
178 &status(sprintf("factstats(vandalismbroken): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0);
179 $start_time = &timeget();
181 # parse the factoids.
182 foreach (keys %data) {
183 if (&validFactoid($_, $data{$_}) == 0) {
184 s/([\,\;]+)/\037$1\037/g; # highlight chars.
185 push(@list, $_); # push it.
189 $delta_time = &timedelta($start_time);
190 &status(sprintf("factstats(vandalism): %.02f sec to complete.", $delta_time)) if ($delta_time > 0);
192 # bail out on no results.
193 if (scalar @list == 0) {
194 return 'no vandalised factoids... wooohoo.';
198 my $prefix = "Vandalised factoid ";
199 return &formListReply(1, $prefix, @list);
201 } elsif ($type =~ /^total$/i) {
202 &status("factstats(total): starting...");
203 my $start_time = &timeget();
210 # total factoids requests.
211 $i = &sumKey("factoids", "requested_count");
212 push(@list, "total requests - $i");
214 # total factoids modified.
215 $str = &countKeys("factoids", "modified_by");
216 push(@list, "total modified - $str");
218 # total factoids modified.
219 $j = &countKeys("factoids", "requested_count");
220 $str = &countKeys("factoids", "factoid_key");
221 push(@list, "total non-requested - ".($str - $i));
223 # average request/factoid.
224 # i/j == total(requested_count)/count(requested_count)
225 $str = sprintf("%.01f", $i/$j);
226 push(@list, "average requested per factoid - $str");
228 # total prepared for deletion.
229 $str = scalar( &searchTable("factoids", "factoid_key", "factoid_value", " #DEL") );
230 push(@list, "total prepared for deletion - $str");
232 # total unique authors.
233 foreach ( &dbRawReturn("SELECT created_by FROM factoids WHERE created_by IS NOT NULL") ) {
238 push(@list, "total unique authors - ".(scalar keys %hash) );
241 # total unique requesters.
242 foreach ( &dbRawReturn("SELECT requested_by FROM factoids WHERE requested_by IS NOT NULL") ) {
247 push(@list, "total unique requesters - ".(scalar keys %hash) );
252 my $delta_time = &timedelta($start_time);
253 &status(sprintf("factstats(broken): %.02f sec to retreive all factoids.", $delta_time)) if ($delta_time > 0);
254 $start_time = &timeget();
256 # bail out on no results.
257 if (scalar @list == 0) {
258 return 'no broken factoids... wooohoo.';
262 my $prefix = "General factoid stiatistics ";
263 return &formListReply(1, $prefix, @list);
265 } elsif ($type =~ /^deadredir$/i) {
266 my @list = &searchTable("factoids", "factoid_key",
267 "factoid_value", "^<REPLY> see ");
273 my $val = &getFactInfo($factoid, "factoid_value");
274 if ($val =~ /^<REPLY> ?see( also)? (.*?)\.?$/i) {
276 my $redir = &getFactInfo($redirf, "factoid_value");
277 next if (defined $redir);
278 next if (length $val > 50);
280 $redir{$redirf}{$factoid} = 1;
285 foreach $f (keys %redir) {
286 my @sublist = keys %{ $redir{$f} };
288 s/([\,\;]+)/\037$1\037/g;
291 push(@newlist, join(', ', @sublist)." => $f");
295 my $prefix = "Loose link (dead) redirections in factoids ";
296 return &formListReply(1, $prefix, @newlist);
298 } elsif ($type =~ /^dup(licate|e)$/i) {
299 &status("factstats(dupe): starting...");
300 my $start_time = &timeget();
301 my %hash = &dbGetCol("factoids", "factoid_key,factoid_value", "factoid_value IS NOT NULL", 1);
306 foreach $v (keys %hash) {
307 my $count = scalar(keys %{ $hash{$v} });
308 next if ($count == 1);
311 foreach (keys %{ $hash{$v} }) {
312 if ($v =~ /^<REPLY> see /i) {
317 s/([\,\;]+)/\037$1\037/g;
319 &WARN("dupe: _ = NULL. should never happen!.");
325 next unless (scalar @sublist);
327 push(@list, join(", ", @sublist));
330 &status("factstats(dupe): (good) dupe refs: $refs.");
331 my $delta_time = &timedelta($start_time);
332 &status(sprintf("factstats(dupe): %.02f sec to complete", $delta_time)) if ($delta_time > 0);
334 # bail out on no results.
335 if (scalar @list == 0) {
336 return "no duplicate factoids... woohoo.";
340 my $prefix = "dupe factoid ";
341 return &formListReply(1, $prefix, @list);
343 } elsif ($type =~ /^nullfactoids$/i) {
344 my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE factoid_value=''";
345 my $sth = $dbh->prepare($query);
346 &ERROR("factstats(null): => '$query'.") unless $sth->execute;
349 while (my @row = $sth->fetchrow_array) {
351 &DEBUG("row[1] != NULL for $row[0].");
355 &DEBUG("row[0] => '$row[0]'.");
356 push(@list, $row[0]);
361 my $prefix = "NULL factoids (not deleted yet) ";
362 return &formListReply(1, $prefix, @list);
364 } elsif ($type =~ /^(2|too)short$/i) {
365 # Custom select statement.
366 my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) <= 40";
367 my $sth = $dbh->prepare($query);
368 &ERROR("factstats(lame): => '$query'.") unless $sth->execute;
371 while (my @row = $sth->fetchrow_array) {
372 my($key,$val) = ($row[0], $row[1]);
374 $match++ if ($val =~ /\s{3,}/);
375 next unless ($match);
377 my $v = &getFactoid($val);
379 &DEBUG("key $key => $val => $v");
382 $key =~ s/\,/\037\,\037/g;
388 my $prefix = "Lame factoids ";
389 return &formListReply(1, $prefix, @list);
391 } elsif ($type =~ /^listfix$/i) {
392 # Custom select statement.
393 my $query = "SELECT factoid_key,factoid_value FROM factoids";
394 my $sth = $dbh->prepare($query);
395 &ERROR("factstats(listfix): => '$query'.") unless $sth->execute;
398 while (my @row = $sth->fetchrow_array) {
399 my($key,$val) = ($row[0], $row[1]);
401 $match++ if ($val =~ /\S+,? or \S+,? or \S+,? or \S+,?/);
402 next unless ($match);
404 $key =~ s/\,/\037\,\037/g;
406 $val =~ s/,? or /, /g;
407 &DEBUG("fixed: => $val.");
408 &setFactInfo($key,"factoid_value", $val);
413 my $prefix = "Inefficient lists fixed ";
414 return &formListReply(1, $prefix, @list);
416 } elsif ($type =~ /^locked$/i) {
417 my %hash = &dbGetCol("factoids", "factoid_key,locked_by", "locked_by IS NOT NULL");
418 my @list = keys %hash;
421 s/([\,\;]+)/\037$1\037/g;
424 my $prefix = "factoid statistics on $type ";
425 return &formListReply(0, $prefix, @list);
427 } elsif ($type =~ /^new$/i) {
428 my %hash = &dbGetCol("factoids", "factoid_key,created_time", "created_time IS NOT NULL");
431 foreach (keys %hash) {
432 my $created_time = $hash{$_};
433 my $delta_time = time() - $created_time;
434 next if ($delta_time >= 60*60*24);
436 $age{$delta_time}{$_} = 1;
439 if (scalar keys %age == 0) {
440 return "sorry, no new factoids.";
444 foreach (sort {$a <=> $b} keys %age) {
445 push(@list, join(",", keys %{ $age{$_} }));
448 my $prefix = "new factoids in the last 24hours ";
449 return &formListReply(0, $prefix, @list);
451 } elsif ($type =~ /^part(ial)?dupe$/i) {
452 ### requires "custom" select statement... oh well...
453 my $start_time = &timeget();
455 # form length|key and key=length hash list.
456 &status("factstats(partdupe): forming length hash list.");
457 my $query = "SELECT factoid_key,factoid_value,length(factoid_value) AS length FROM factoids WHERE length(factoid_value) >= 192 ORDER BY length";
458 my $sth = $dbh->prepare($query);
459 &ERROR("factstats(partdupe): => '$query'.") unless $sth->execute;
463 while (my @row = $sth->fetchrow_array) {
464 $length{$row[2]}{$row[0]} = 1; # length(value)|key.
465 $key{$row[0]} = $row[1]; # key=value.
469 &status("factstats(partdupe): total keys => '". scalar(@key) ."'.");
470 &status("factstats(partdupe): now deciphering data gathered");
472 my @length = sort { $a <=> $b } keys %length;
475 foreach $key (@key) {
476 shift @length if (length $key{$key} == $length[0]);
478 my $val = quotemeta $key{$key};
481 foreach $length (@length) {
482 foreach (keys %{ $length{$length} }) {
483 if ($key{$_} =~ /^$val/i) {
484 s/([\,\;]+)/\037$1\037/g;
485 s/( and|and )/\037$1\037/g;
486 push(@sublist,$key." and ".$_);
490 push(@list, join(" ,",@sublist)) if (scalar @sublist);
493 my $delta_time = sprintf("%.02fs", &timedelta($start_time) );
494 &status("factstats(partdupe): $delta_time sec to complete.") if ($delta_time > 0);
496 # bail out on no results.
497 if (scalar @list == 0) {
498 return "no initial partial duplicate factoids... woohoo.";
502 my $prefix = "initial partial dupe factoid ";
503 return &formListReply(1, $prefix, @list);
505 } elsif ($type =~ /^profanity$/i) {
506 my %data = &dbGetCol("factoids", "factoid_key,factoid_value", "factoid_value IS NOT NULL");
509 foreach (keys %data) {
510 push(@list, $_) if (&hasProfanity($_." ".$data{$_}));
514 my $prefix = "Profanity in factoids ";
515 return &formListReply(1, $prefix, @list);
517 } elsif ($type =~ /^redir(ection)?$/i) {
518 my @list = &searchTable("factoids", "factoid_key",
519 "factoid_value", "^<REPLY> see ");
525 my $val = &getFactInfo($factoid, "factoid_value");
526 if ($val =~ /^<REPLY> see( also)? (.*?)\.?$/i) {
528 my $redirval = &getFactInfo($redir, "factoid_value");
529 if (defined $redirval) {
530 $redir{$redir}{$factoid} = 1;
532 &WARN("factstats(redir): '$factoid' has loose link => '$redir'.");
538 foreach $f (keys %redir) {
539 my @sublist = keys %{ $redir{$f} };
541 s/([\,\;]+)/\037$1\037/g;
544 push(@newlist, "$f => ". join(', ', @sublist));
548 my $prefix = "Redirections in factoids ";
549 return &formListReply(1, $prefix, @newlist);
551 } elsif ($type =~ /^request(ed)?$/i) {
552 my %hash = &dbGetCol("factoids", "factoid_key,requested_count", "requested_count IS NOT NULL", 1);
554 if (!scalar keys %hash) {
555 return 'sorry, no factoids have been questioned.';
561 foreach $count (sort {$b <=> $a} keys %hash) {
562 my @faqtoids = sort keys %{ $hash{$count} };
565 s/([\,\;]+)/\037$1\037/g;
567 $total += $count * scalar(@faqtoids);
569 push(@list, "$count - ". join(", ", @faqtoids));
571 unshift(@list, "\037$total - TOTAL\037");
573 my $prefix = "factoid statistics on $type ";
574 return &formListReply(0, $prefix, @list);
576 } elsif ($type =~ /^reqrate$/i) {
577 my %hash = &dbGetCol("factoids",
578 "factoid_key,(unix_timestamp() - created_time)/requested_count as rate",
579 "requested_by IS NOT NULL and created_time IS NOT NULL ORDER BY rate LIMIT 15", 1);
585 foreach $rate (sort { $b <=> $a } keys %hash) {
586 my $f = join(", ", sort keys %{ $hash{$rate} });
587 my $str = "$f - ".&Time2String($rate);
592 my $prefix = "Rank of top factoid rate (time/req): ";
593 return &formListReply(0, $prefix, @list);
595 } elsif ($type =~ /^requesters?$/i) {
596 my %hash = &dbGetCol("factoids", "factoid_key,requested_by", "requested_by IS NOT NULL");
599 foreach (keys %hash) {
600 my $thisnuh = $hash{$_};
602 $thisnuh =~ /^(\S+)!\S+@\S+$/;
606 if (!scalar keys %requester) {
607 return 'sorry, no factoids with requested_by field.';
612 foreach (keys %requester) {
613 $count{ $requester{$_} }{$_} = 1;
621 foreach $count (sort { $b <=> $a } keys %count) {
622 my $requester = join(", ", sort keys %{ $count{$count} });
623 $total += $count * scalar(keys %{ $count{$count} });
624 $users += scalar(keys %{ $count{$count} });
625 push(@list, "$count by $requester");
627 unshift(@list, "\037$total TOTAL REQUESTS; $users UNIQUE REQUESTERS\037");
628 # should not the above value be the same as collected by
629 # 'requested'? soemthing weird is going on!
631 my $prefix = "rank of top factoid requesters: ";
632 return &formListReply(0, $prefix, @list);
634 } elsif ($type =~ /^seefix$/i) {
635 my @list = &searchTable("factoids", "factoid_key",
636 "factoid_value", "^see ");
644 my $val = &getFactInfo($factoid, "factoid_value");
646 next unless ($val =~ /^see( also)? (.*?)\.?$/i);
649 my $redir = &getFactInfo($redirf, "factoid_value");
651 if ($redirf =~ /^\Q$factoid\W$/i) {
652 &delFactoid($factoid);
656 if (defined $redir) { # good.
657 &setFactInfo($factoid,"factoid_value","<REPLY> see $redir");
660 push(@newlist, $redirf);
665 &msg($who, "Fixed $fixed factoids.");
666 &msg($who, "Self looped factoids removed: ".
667 sort(keys %loop) ) if (scalar keys %loop);
669 my $prefix = "Loose link (dead) redirections in factoids ";
670 return &formListReply(1, $prefix, @newlist);
672 } elsif ($type =~ /^(2|too)long$/i) {
676 $query = "SELECT factoid_key FROM factoids WHERE length(factoid_key) >= $param{'maxKeySize'}";
677 my $sth = $dbh->prepare($query);
679 while (my @row = $sth->fetchrow_array) {
684 my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) >= $param{'maxDataSize'}";
685 $sth = $dbh->prepare($query);
687 while (my @row = $sth->fetchrow_array) {
688 push(@list,sprintf("\002%s\002 - %s", length($row[1]), $row[0]));
691 if (scalar @list == 0) {
692 return "good. no factoids exceed length.";
696 my $prefix = "factoid key||value exceeding length ";
697 return &formListReply(1, $prefix, @list);
699 } elsif ($type =~ /^unrequest(ed)?$/i) {
700 my @list = &dbRawReturn("SELECT factoid_key FROM factoids WHERE requested_count IS NULL");
703 s/([\,\;]+)/\037$1\037/g;
706 my $prefix = "Unrequested factoids ";
707 return &formListReply(0, $prefix, @list);
710 return "error: invalid type => '$type'.";
715 my @list = &searchTable("factoids","factoid_key", "created_by", "^$query!");
717 my $prefix = "factoid author list by '$query' ";
718 &performStrictReply( &formListReply(1, $prefix, @list) );