2 # Factoids.pl: Helpers for generating factoids statistics.
4 # Version: v0.1 (20000514)
5 # Splitted: SQLExtras.pl
10 use vars qw($dbh $who);
14 # Usage: &CmdFactInfo($faqtoid, $query);
16 my ($faqtoid, $query) = (lc $_[0], $_[1]);
25 my %factinfo = &sqlSelectRowHash('factoids', '*',
26 { factoid_key => $faqtoid }
29 # factoid does not exist.
30 if (scalar (keys %factinfo) <= 1) {
31 &performReply("there's no such factoid as \002$faqtoid\002");
35 # fix for problem observed by asuffield.
36 # why did it happen though?
37 if (!$factinfo{'factoid_value'}) {
38 &performReply("there's no such factoid as \002$faqtoid\002; deleted because we don't have factoid_value!");
39 foreach (keys %factinfo) {
40 &DEBUG("factinfo{$_} => '$factinfo{$_}'.");
42 ### &delFactoid($faqtoid);
47 if ($factinfo{'created_by'}) {
49 $factinfo{'created_by'} =~ s/\!/ </;
50 $factinfo{'created_by'} .= '>';
51 $string = "created by $factinfo{'created_by'}";
53 my $time = $factinfo{'created_time'};
55 if (time() - $time > 60*60*24*7) {
56 my $days = int( (time() - $time)/60/60/24 );
57 $string .= " at \037". scalar(gmtime $time). "\037" .
60 $string .= ' '.&Time2String(time() - $time).' ago';
67 # modified: (TimRiker asks: why do you keep turning this off?)
68 if ($factinfo{'modified_by'}) {
69 $string = 'last modified';
71 my $time = $factinfo{'modified_time'};
73 if (time() - $time > 60*60*24*7) {
74 $string .= " at \037". scalar(gmtime $time). "\037";
76 $string .= ' '.&Time2String(time() - $time).' ago ';
80 $string .= ' by '.(split ',', $factinfo{'modified_by'})[0];
86 if ($factinfo{'requested_by'}) {
87 my $requested_count = $factinfo{'requested_count'};
89 if ($requested_count) {
90 $string = 'it has been requested ';
91 if ($requested_count == 1) {
92 $string .= "\002once\002";
94 $string .= "\002". $requested_count. "\002 ".
95 &fixPlural('time', $requested_count);
98 my $requested_by = $factinfo{'requested_by'};
99 $requested_by =~ /\!/;
100 $string .= ", last by $`";
102 my $requested_time = $factinfo{'requested_time'};
103 if ($requested_time) {
104 if (time() - $requested_time > 60*60*24*7) {
105 $string .= " at \037". scalar(localtime $requested_time). "\037";
107 $string .= ', '.&Time2String(time() - $requested_time).' ago';
111 $string = 'has not been requested yet';
114 push(@array, $string);
118 if ($factinfo{'locked_by'}) {
119 $factinfo{'locked_by'} =~ /\!/;
120 $string = "it has been locked by $`";
122 push(@array, $string);
125 # factoid was inserted not through the bot.
126 if (!scalar @array) {
127 &performReply("no extra info on \002$faqtoid\002");
131 &performStrictReply("$factinfo{'factoid_key'} -- ". join('; ', @array) .'.');
138 if ($type =~ /^author$/i) {
139 my %hash = &sqlSelectColHash('factoids',
140 'factoid_key,created_by', undef,
141 'WHERE created_by IS NOT NULL'
145 foreach my $factoid (keys %hash) {
146 my $thisnuh = $hash{$factoid};
148 $thisnuh =~ /^(\S+)!\S+@\S+$/;
152 if (!scalar keys %author) {
153 return 'sorry, no factoids with created_by field.';
158 foreach (keys %author) {
159 $count{ $author{$_} }{$_} = 1;
165 foreach $count (sort { $b <=> $a } keys %count) {
166 my $author = join(', ', sort keys %{ $count{$count} });
167 push(@list, "$count by $author");
170 my $prefix = 'factoid statistics by author: ';
171 return &formListReply(0, $prefix, @list);
173 } elsif ($type =~ /^vandalism$/i) {
174 &status('factstats(vandalism): starting...');
175 my $start_time = &timeget();
176 my %data = &sqlSelectColHash('factoids',
177 'factoid_key,factoid_value', undef,
178 'WHERE factoid_value IS NOT NULL'
182 my $delta_time = &timedelta($start_time);
183 &status(sprintf('factstats(vandalism): %.02f sec to retreive all factoids.', $delta_time)) if ($delta_time > 0);
184 $start_time = &timeget();
186 # parse the factoids.
187 foreach (keys %data) {
188 if (&validFactoid($_, $data{$_}) == 0) {
189 s/([\,\;]+)/\037$1\037/g; # highlight chars.
190 push(@list, $_); # push it.
194 $delta_time = &timedelta($start_time);
195 &status(sprintf('factstats(vandalism): %.02f sec to complete.', $delta_time)) if ($delta_time > 0);
197 # bail out on no results.
198 if (scalar @list == 0) {
199 return 'no vandalised factoids... wooohoo.';
203 my $prefix = 'Vandalised factoid ';
204 return &formListReply(1, $prefix, @list);
206 } elsif ($type =~ /^total$/i) {
207 &status('factstats(total): starting...');
208 my $start_time = &timeget();
215 # total factoids requests.
216 $i = &sumKey('factoids', 'requested_count');
217 push(@list, "total requests - $i");
219 # total factoids modified.
220 $str = &countKeys('factoids', 'modified_by');
221 push(@list, "total modified - $str");
223 # total factoids modified.
224 $j = &countKeys('factoids', 'requested_count');
225 $str = &countKeys('factoids', 'factoid_key');
226 push(@list, 'total non-requested - '.($str - $i));
228 # average request/factoid.
229 # i/j == total(requested_count)/count(requested_count)
230 $str = sprintf('%.01f', $i/$j);
231 push(@list, "average requested per factoid - $str");
233 # total prepared for deletion.
234 $str = scalar( &searchTable('factoids', 'factoid_key', 'factoid_value', ' #DEL') );
235 push(@list, "total prepared for deletion - $str");
237 # total unique authors.
238 # TODO: convert to sqlSelectColHash ? (or ColArray?)
239 foreach ( &sqlRawReturn('SELECT created_by FROM factoids WHERE created_by IS NOT NULL') ) {
244 push(@list, 'total unique authors - '.(scalar keys %hash) );
247 # total unique requesters.
248 foreach ( &sqlRawReturn('SELECT requested_by FROM factoids WHERE requested_by IS NOT NULL') ) {
253 push(@list, 'total unique requesters - '.(scalar keys %hash) );
258 my $delta_time = &timedelta($start_time);
259 &status(sprintf('factstats(broken): %.02f sec to retreive all factoids.', $delta_time)) if ($delta_time > 0);
260 $start_time = &timeget();
262 # bail out on no results.
263 if (scalar @list == 0) {
264 return 'no broken factoids... wooohoo.';
268 my $prefix = 'General factoid statistics ';
269 return &formListReply(1, $prefix, @list);
271 } elsif ($type =~ /^deadredir$/i) {
272 my @list = &searchTable('factoids', 'factoid_key',
273 'factoid_value', '^<REPLY> see ');
279 my $val = &getFactInfo($factoid, 'factoid_value');
280 if ($val =~ /^<REPLY> ?see( also)? (.*?)\.?$/i) {
282 my $redir = &getFactInfo($redirf, 'factoid_value');
283 next if (defined $redir);
284 next if (length $val > 50);
286 $redir{$redirf}{$factoid} = 1;
291 foreach $f (keys %redir) {
292 my @sublist = keys %{ $redir{$f} };
294 s/([\,\;]+)/\037$1\037/g;
297 push(@newlist, join(', ', @sublist)." => $f");
301 my $prefix = 'Loose link (dead) redirections in factoids ';
302 return &formListReply(1, $prefix, @newlist);
304 } elsif ($type =~ /^dup(licate|e)$/i) {
305 &status('factstats(dupe): starting...');
306 my $start_time = &timeget();
307 my %hash = &sqlSelectColHash('factoids',
308 'factoid_key,factoid_value', undef,
309 'WHERE factoid_value IS NOT NULL', 1
315 foreach $v (keys %hash) {
316 my $count = scalar(keys %{ $hash{$v} });
317 next if ($count == 1);
320 foreach (keys %{ $hash{$v} }) {
321 if ($v =~ /^<REPLY> see /i) {
326 s/([\,\;]+)/\037$1\037/g;
328 &WARN('dupe: _ = NULL. should never happen!.');
334 next unless (scalar @sublist);
336 push(@list, join(', ', @sublist));
339 &status("factstats(dupe): (good) dupe refs: $refs.");
340 my $delta_time = &timedelta($start_time);
341 &status(sprintf('factstats(dupe): %.02f sec to complete', $delta_time)) if ($delta_time > 0);
343 # bail out on no results.
344 if (scalar @list == 0) {
345 return 'no duplicate factoids... woohoo.';
349 my $prefix = 'dupe factoid ';
350 return &formListReply(1, $prefix, @list);
352 } elsif ($type =~ /^nullfactoids$/i) {
353 my $query = "SELECT factoid_key,factoid_value FROM factoids WHERE factoid_value=''";
354 my $sth = $dbh->prepare($query);
355 &ERROR("factstats(null): => '$query'.") unless $sth->execute;
358 while (my @row = $sth->fetchrow_array) {
360 &DEBUG("row[1] != NULL for $row[0].");
364 &DEBUG("row[0] => '$row[0]'.");
365 push(@list, $row[0]);
370 my $prefix = 'NULL factoids (not deleted yet) ';
371 return &formListReply(1, $prefix, @list);
373 } elsif ($type =~ /^(2|too)short$/i) {
374 # Custom select statement.
375 my $query = 'SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) <= 40';
376 my $sth = $dbh->prepare($query);
377 &ERROR("factstats(lame): => '$query'.") unless $sth->execute;
380 while (my @row = $sth->fetchrow_array) {
381 my($key,$val) = ($row[0], $row[1]);
383 $match++ if ($val =~ /\s{3,}/);
384 next unless ($match);
386 my $v = &getFactoid($val);
388 &DEBUG("key $key => $val => $v");
391 $key =~ s/\,/\037\,\037/g;
397 my $prefix = 'Lame factoids ';
398 return &formListReply(1, $prefix, @list);
400 } elsif ($type =~ /^listfix$/i) {
401 # Custom select statement.
402 my $query = 'SELECT factoid_key,factoid_value FROM factoids';
403 my $sth = $dbh->prepare($query);
404 &ERROR("factstats(listfix): => '$query'.") unless $sth->execute;
407 while (my @row = $sth->fetchrow_array) {
408 my($key,$val) = ($row[0], $row[1]);
410 $match++ if ($val =~ /\S+,? or \S+,? or \S+,? or \S+,?/);
411 next unless ($match);
413 $key =~ s/\,/\037\,\037/g;
415 $val =~ s/,? or /, /g;
416 &DEBUG("fixed: => $val.");
417 &setFactInfo($key,'factoid_value', $val);
422 my $prefix = 'Inefficient lists fixed ';
423 return &formListReply(1, $prefix, @list);
425 } elsif ($type =~ /^locked$/i) {
426 my %hash = &sqlSelectColHash('factoids',
427 'factoid_key,locked_by', undef,
428 'WHERE locked_by IS NOT NULL'
430 my @list = keys %hash;
433 s/([\,\;]+)/\037$1\037/g;
436 my $prefix = "factoid statistics on $type ";
437 return &formListReply(0, $prefix, @list);
439 } elsif ($type =~ /^new$/i) {
440 my %hash = &sqlSelectColHash('factoids',
441 'factoid_key,created_time', undef,
442 'WHERE created_time IS NOT NULL'
446 foreach (keys %hash) {
447 my $created_time = $hash{$_};
448 my $delta_time = time() - $created_time;
449 next if ($delta_time >= 60*60*24);
451 $age{$delta_time}{$_} = 1;
454 if (scalar keys %age == 0) {
455 return 'sorry, no new factoids.';
459 foreach (sort {$a <=> $b} keys %age) {
460 push(@list, join(',', keys %{ $age{$_} }));
463 my $prefix = 'new factoids in the last 24hours ';
464 return &formListReply(0, $prefix, @list);
466 } elsif ($type =~ /^part(ial)?dupe$/i) {
467 ### requires 'custom' select statement... oh well...
468 my $start_time = &timeget();
470 # form length|key and key=length hash list.
471 &status('factstats(partdupe): forming length hash list.');
472 my $query = 'SELECT factoid_key,factoid_value,length(factoid_value) AS length FROM factoids WHERE length(factoid_value) >= 192 ORDER BY length';
473 my $sth = $dbh->prepare($query);
474 &ERROR("factstats(partdupe): => '$query'.") unless $sth->execute;
478 while (my @row = $sth->fetchrow_array) {
479 $length{$row[2]}{$row[0]} = 1; # length(value)|key.
480 $key{$row[0]} = $row[1]; # key=value.
484 &status("factstats(partdupe): total keys => '". scalar(@key) ."'.");
485 &status('factstats(partdupe): now deciphering data gathered');
487 my @length = sort { $a <=> $b } keys %length;
490 foreach $key (@key) {
491 shift @length if (length $key{$key} == $length[0]);
493 my $val = quotemeta $key{$key};
496 foreach $length (@length) {
497 foreach (keys %{ $length{$length} }) {
498 if ($key{$_} =~ /^$val/i) {
499 s/([\,\;]+)/\037$1\037/g;
500 s/( and|and )/\037$1\037/g;
501 push(@sublist,$key.' and '.$_);
505 push(@list, join(' ,',@sublist)) if (scalar @sublist);
508 my $delta_time = sprintf('%.02fs', &timedelta($start_time) );
509 &status("factstats(partdupe): $delta_time sec to complete.") if ($delta_time > 0);
511 # bail out on no results.
512 if (scalar @list == 0) {
513 return 'no initial partial duplicate factoids... woohoo.';
517 my $prefix = 'initial partial dupe factoid ';
518 return &formListReply(1, $prefix, @list);
520 } elsif ($type =~ /^profanity$/i) {
521 my %data = &sqlSelectColHash('factoids',
522 'factoid_key,factoid_value', undef,
523 'WHERE factoid_value IS NOT NULL'
527 foreach (keys %data) {
528 push(@list, $_) if (&hasProfanity($_.' '.$data{$_}));
532 my $prefix = 'Profanity in factoids ';
533 return &formListReply(1, $prefix, @list);
535 } elsif ($type =~ /^redir(ection)?$/i) {
536 my @list = &searchTable('factoids', 'factoid_key',
537 'factoid_value', '^<REPLY> see ');
544 my $val = &getFactInfo($factoid, 'factoid_value');
545 if ($val =~ /^<REPLY> see( also)? (.*?)\.?$/i) {
547 my $redirval = &getFactInfo($redir, 'factoid_value');
548 if (defined $redirval) {
549 $redir{$redir}{$factoid} = 1;
551 &DEBUG("factstats(redir): '$factoid' has loose link => '$redir'.");
558 foreach $f (keys %redir) {
559 my @sublist = keys %{ $redir{$f} };
561 s/([\,\;]+)/\037$1\037/g;
564 push(@newlist, "$f => ". join(', ', @sublist));
568 my $prefix = "Redirections in factoids, $dangling dangling ";
569 return &formListReply(1, $prefix, @newlist);
571 } elsif ($type =~ /^request(ed)?$/i) {
572 my %hash = &sqlSelectColHash('factoids',
573 'factoid_key,requested_count', undef,
574 'WHERE requested_count IS NOT NULL', 1
577 if (!scalar keys %hash) {
578 return 'sorry, no factoids have been questioned.';
584 foreach $count (sort {$b <=> $a} keys %hash) {
585 my @faqtoids = sort keys %{ $hash{$count} };
588 s/([\,\;]+)/\037$1\037/g;
590 $total += $count * scalar(@faqtoids);
592 push(@list, "$count - ". join(', ', @faqtoids));
594 unshift(@list, "\037$total - TOTAL\037");
596 my $prefix = "factoid statistics on $type ";
597 return &formListReply(0, $prefix, @list);
599 } elsif ($type =~ /^reqrate$/i) {
600 my %hash = &sqlSelectColHash('factoids',
601 "factoid_key,(unix_timestamp() - created_time)/requested_count as rate", undef,
602 'WHERE requested_by IS NOT NULL and created_time IS NOT NULL ORDER BY rate LIMIT 15', 1
609 foreach $rate (sort { $b <=> $a } keys %hash) {
610 my $f = join(', ', sort keys %{ $hash{$rate} });
611 my $str = "$f - ".&Time2String($rate);
616 my $prefix = "Rank of top factoid rate (time/req): ";
617 return &formListReply(0, $prefix, @list);
619 } elsif ($type =~ /^requesters?$/i) {
620 my %hash = &sqlSelectColHash('factoids',
621 'factoid_key,requested_by', undef,
622 'WHERE requested_by IS NOT NULL'
626 foreach (keys %hash) {
627 my $thisnuh = $hash{$_};
629 $thisnuh =~ /^(\S+)!\S+@\S+$/;
633 if (!scalar keys %requester) {
634 return 'sorry, no factoids with requested_by field.';
639 foreach (keys %requester) {
640 $count{ $requester{$_} }{$_} = 1;
648 foreach $count (sort { $b <=> $a } keys %count) {
649 my $requester = join(', ', sort keys %{ $count{$count} });
650 $total += $count * scalar(keys %{ $count{$count} });
651 $users += scalar(keys %{ $count{$count} });
652 push(@list, "$count by $requester");
654 unshift(@list, "\037$total TOTAL REQUESTS; $users UNIQUE REQUESTERS\037");
655 # should not the above value be the same as collected by
656 # 'requested'? soemthing weird is going on!
658 my $prefix = 'rank of top factoid requesters: ';
659 return &formListReply(0, $prefix, @list);
661 } elsif ($type =~ /^seefix$/i) {
662 my @list = &searchTable('factoids', 'factoid_key',
663 'factoid_value', '^see ');
671 my $val = &getFactInfo($factoid, 'factoid_value');
673 next unless ($val =~ /^see( also)? (.*?)\.?$/i);
676 my $redir = &getFactInfo($redirf, 'factoid_value');
678 if ($redirf =~ /^\Q$factoid\W$/i) {
679 &delFactoid($factoid);
683 if (defined $redir) { # good.
684 &setFactInfo($factoid,'factoid_value',"<REPLY> see $redir");
687 push(@newlist, $redirf);
692 &msg($who, "Fixed $fixed factoids.");
693 &msg($who, 'Self looped factoids removed: '. keys %loop ) if (scalar keys %loop);
695 my $prefix = "Loose link (dead) redirections in factoids ";
696 return &formListReply(1, $prefix, @newlist);
698 } elsif ($type =~ /^(2|too)long$/i) {
703 $query = "SELECT factoid_key FROM factoids WHERE length(factoid_key) >= $param{'maxKeySize'}";
704 my $sth = $dbh->prepare($query);
706 while (my @row = $sth->fetchrow_array) {
712 $query = "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) >= $param{'maxDataSize'}";
713 $sth = $dbh->prepare($query);
715 while (my @row = $sth->fetchrow_array) {
716 push(@list,sprintf("\002%s\002 - %s", length($row[1]), $row[0]));
720 if (scalar @list == 0) {
721 return 'good. no factoids exceed length.';
725 my $prefix = 'factoid key||value exceeding length ';
726 return &formListReply(1, $prefix, @list);
728 } elsif ($type =~ /^unrequest(ed)?$/i) {
729 # TODO: use sqlSelect()
730 my ($count) = &sqlRawReturn("SELECT COUNT(*) FROM factoids WHERE requested_count = '0'");
732 return "Unrequested factoids: $count";
735 return "error: invalid type => '$type'.";
740 my $maxshow = &::getChanConfDefault('maxListReplyCount', 15, $chan);
741 my @list = &searchTable('factoids','factoid_key', 'created_by', "^$query!");
742 @list=grep(!/\#DEL\#$/,@list) if (scalar(@list) > $maxshow);
744 my $prefix = "factoid author list by '$query' ";
745 &performStrictReply( &formListReply(1, $prefix, @list) );