2 # Factoids.pl: Helpers for generating factoids statistics.
4 # Version: v0.1 (20000514)
5 # Splitted: SQLExtras.pl
10 use vars qw($dbh $who);
14 # Usage: &CmdFactInfo($faqtoid, $query);
16 my ( $faqtoid, $query ) = ( lc $_[0], $_[1] );
20 if ( $faqtoid eq '' ) {
26 # ops can see deleted faq info
27 $faqtoid=~s/#del#/#DEL#/;
30 &sqlSelectRowHash( 'factoids', '*', { factoid_key => $faqtoid } );
32 # factoid does not exist.
33 if ( scalar( keys %factinfo ) <= 1 ) {
34 &performReply("there's no such factoid as \002$faqtoid\002");
38 # fix for problem observed by asuffield.
39 # why did it happen though?
40 if ( !$factinfo{'factoid_value'} ) {
42 "there's no such factoid as \002$faqtoid\002; deleted because we don't have factoid_value!"
44 foreach ( keys %factinfo ) {
45 &DEBUG("factinfo{$_} => '$factinfo{$_}'.");
47 ### &delFactoid($faqtoid);
52 if ( $factinfo{'created_by'} ) {
54 $factinfo{'created_by'} =~ s/\!/ </;
55 $factinfo{'created_by'} .= '>';
56 $string = "created by $factinfo{'created_by'}";
58 my $time = $factinfo{'created_time'};
60 if ( time() - $time > 60 * 60 * 24 * 7 ) {
61 my $days = int( ( time() - $time ) / 60 / 60 / 24 );
63 . scalar( gmtime $time ) . "\037"
67 $string .= ' ' . &Time2String( time() - $time ) . ' ago';
71 push( @array, $string );
74 # modified: (TimRiker asks: why do you keep turning this off?)
75 if ( $factinfo{'modified_by'} ) {
76 $string = 'last modified';
78 my $time = $factinfo{'modified_time'};
80 if ( time() - $time > 60 * 60 * 24 * 7 ) {
81 $string .= " at \037" . scalar( gmtime $time ) . "\037";
84 $string .= ' ' . &Time2String( time() - $time ) . ' ago ';
88 $string .= ' by ' . ( split ',', $factinfo{'modified_by'} )[0];
90 push( @array, $string );
94 if ( $factinfo{'requested_by'} ) {
95 my $requested_count = $factinfo{'requested_count'};
97 if ($requested_count) {
98 $string = 'it has been requested ';
99 if ( $requested_count == 1 ) {
100 $string .= "\002once\002";
104 . $requested_count . "\002 "
105 . &fixPlural( 'time', $requested_count );
108 my $requested_by = $factinfo{'requested_by'};
109 $requested_by =~ /\!/;
110 $string .= ", last by $`";
112 my $requested_time = $factinfo{'requested_time'};
113 if ($requested_time) {
114 if ( time() - $requested_time > 60 * 60 * 24 * 7 ) {
116 " at \037" . scalar( localtime $requested_time ) . "\037";
120 ', ' . &Time2String( time() - $requested_time ) . ' ago';
125 $string = 'has not been requested yet';
128 push( @array, $string );
132 if ( $factinfo{'locked_by'} ) {
133 $factinfo{'locked_by'} =~ /\!/;
134 $string = "it has been locked by $`";
136 push( @array, $string );
139 # factoid was inserted not through the bot.
140 if ( !scalar @array ) {
141 &performReply("no extra info on \002$faqtoid\002");
146 "$factinfo{'factoid_key'} -- " . join( '; ', @array ) . '.' );
153 if ( $type =~ /^author$/i ) {
154 my %hash = &sqlSelectColHash(
155 'factoids', 'factoid_key,created_by',
156 undef, 'WHERE created_by IS NOT NULL'
160 foreach my $factoid ( keys %hash ) {
161 my $thisnuh = $hash{$factoid};
163 $thisnuh =~ /^(\S+)!\S+@\S+$/;
167 if ( !scalar keys %author ) {
168 return 'sorry, no factoids with created_by field.';
173 foreach ( keys %author ) {
174 $count{ $author{$_} }{$_} = 1;
180 foreach $count ( sort { $b <=> $a } keys %count ) {
181 my $author = join( ', ', sort keys %{ $count{$count} } );
182 push( @list, "$count by $author" );
185 my $prefix = 'factoid statistics by author: ';
186 return &formListReply( 0, $prefix, @list );
189 elsif ( $type =~ /^vandalism$/i ) {
190 &status('factstats(vandalism): starting...');
191 my $start_time = &timeget();
192 my %data = &sqlSelectColHash(
193 'factoids', 'factoid_key,factoid_value',
194 undef, 'WHERE factoid_value IS NOT NULL'
198 my $delta_time = &timedelta($start_time);
201 'factstats(vandalism): %.02f sec to retreive all factoids.',
203 ) if ( $delta_time > 0 );
204 $start_time = &timeget();
206 # parse the factoids.
207 foreach ( keys %data ) {
208 if ( &validFactoid( $_, $data{$_} ) == 0 ) {
209 s/([\,\;]+)/\037$1\037/g; # highlight chars.
210 push( @list, $_ ); # push it.
214 $delta_time = &timedelta($start_time);
216 sprintf( 'factstats(vandalism): %.02f sec to complete.',
218 ) if ( $delta_time > 0 );
220 # bail out on no results.
221 if ( scalar @list == 0 ) {
222 return 'no vandalised factoids... wooohoo.';
226 my $prefix = 'Vandalised factoid ';
227 return &formListReply( 1, $prefix, @list );
230 elsif ( $type =~ /^total$/i ) {
231 &status('factstats(total): starting...');
232 my $start_time = &timeget();
239 # total factoids requests.
240 $i = &sumKey( 'factoids', 'requested_count' );
241 push( @list, "total requests - $i" );
243 # total factoids modified.
244 $str = &countKeys( 'factoids', 'modified_by' );
245 push( @list, "total modified - $str" );
247 # total factoids modified.
248 $j = &countKeys( 'factoids', 'requested_count' );
249 $str = &countKeys( 'factoids', 'factoid_key' );
250 push( @list, 'total non-requested - ' . ( $str - $i ) );
252 # average request/factoid.
253 # i/j == total(requested_count)/count(requested_count)
254 $str = sprintf( '%.01f', $i / $j );
255 push( @list, "average requested per factoid - $str" );
257 # total prepared for deletion.
260 &searchTable( 'factoids', 'factoid_key', 'factoid_value', ' #DEL' )
262 push( @list, "total prepared for deletion - $str" );
264 # total unique authors.
265 # TODO: convert to sqlSelectColHash ? (or ColArray?)
268 'SELECT created_by FROM factoids WHERE created_by IS NOT NULL')
275 push( @list, 'total unique authors - ' . ( scalar keys %hash ) );
278 # total unique requesters.
281 'SELECT requested_by FROM factoids WHERE requested_by IS NOT NULL'
289 push( @list, 'total unique requesters - ' . ( scalar keys %hash ) );
294 my $delta_time = &timedelta($start_time);
296 sprintf( 'factstats(broken): %.02f sec to retreive all factoids.',
298 ) if ( $delta_time > 0 );
299 $start_time = &timeget();
301 # bail out on no results.
302 if ( scalar @list == 0 ) {
303 return 'no broken factoids... wooohoo.';
307 my $prefix = 'General factoid statistics ';
308 return &formListReply( 1, $prefix, @list );
311 elsif ( $type =~ /^deadredir$/i ) {
313 &searchTable( 'factoids', 'factoid_key', 'factoid_value',
320 my $val = &getFactInfo( $factoid, 'factoid_value' );
321 if ( $val =~ /^<REPLY> ?see( also)? (.*?)\.?$/i ) {
323 my $redir = &getFactInfo( $redirf, 'factoid_value' );
324 next if ( defined $redir );
325 next if ( length $val > 50 );
327 $redir{$redirf}{$factoid} = 1;
332 foreach $f ( keys %redir ) {
333 my @sublist = keys %{ $redir{$f} };
335 s/([\,\;]+)/\037$1\037/g;
338 push( @newlist, join( ', ', @sublist ) . " => $f" );
342 my $prefix = 'Loose link (dead) redirections in factoids ';
343 return &formListReply( 1, $prefix, @newlist );
346 elsif ( $type =~ /^dup(licate|e)$/i ) {
347 &status('factstats(dupe): starting...');
348 my $start_time = &timeget();
350 &sqlSelectColHash( 'factoids', 'factoid_key,factoid_value', undef,
351 'WHERE factoid_value IS NOT NULL', 1 );
356 foreach $v ( keys %hash ) {
357 my $count = scalar( keys %{ $hash{$v} } );
358 next if ( $count == 1 );
361 foreach ( keys %{ $hash{$v} } ) {
362 if ( $v =~ /^<REPLY> see /i ) {
367 s/([\,\;]+)/\037$1\037/g;
369 &WARN('dupe: _ = NULL. should never happen!.');
372 push( @sublist, $_ );
375 next unless ( scalar @sublist );
377 push( @list, join( ', ', @sublist ) );
380 &status("factstats(dupe): (good) dupe refs: $refs.");
381 my $delta_time = &timedelta($start_time);
383 sprintf( 'factstats(dupe): %.02f sec to complete', $delta_time ) )
384 if ( $delta_time > 0 );
386 # bail out on no results.
387 if ( scalar @list == 0 ) {
388 return 'no duplicate factoids... woohoo.';
392 my $prefix = 'dupe factoid ';
393 return &formListReply( 1, $prefix, @list );
396 elsif ( $type =~ /^nullfactoids$/i ) {
398 "SELECT factoid_key,factoid_value FROM factoids WHERE factoid_value=''";
399 my $sth = $dbh->prepare($query);
400 &ERROR("factstats(null): => '$query'.") unless $sth->execute;
403 while ( my @row = $sth->fetchrow_array ) {
404 if ( $row[1] ne '' ) {
405 &DEBUG("row[1] != NULL for $row[0].");
409 &DEBUG("row[0] => '$row[0]'.");
410 push( @list, $row[0] );
415 my $prefix = 'NULL factoids (not deleted yet) ';
416 return &formListReply( 1, $prefix, @list );
419 elsif ( $type =~ /^(2|too)short$/i ) {
421 # Custom select statement.
423 'SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) <= 40';
424 my $sth = $dbh->prepare($query);
425 &ERROR("factstats(lame): => '$query'.") unless $sth->execute;
428 while ( my @row = $sth->fetchrow_array ) {
429 my ( $key, $val ) = ( $row[0], $row[1] );
431 $match++ if ( $val =~ /\s{3,}/ );
432 next unless ($match);
434 my $v = &getFactoid($val);
436 &DEBUG("key $key => $val => $v");
439 $key =~ s/\,/\037\,\037/g;
445 my $prefix = 'Lame factoids ';
446 return &formListReply( 1, $prefix, @list );
449 elsif ( $type =~ /^listfix$/i ) {
451 # Custom select statement.
452 my $query = 'SELECT factoid_key,factoid_value FROM factoids';
453 my $sth = $dbh->prepare($query);
454 &ERROR("factstats(listfix): => '$query'.") unless $sth->execute;
457 while ( my @row = $sth->fetchrow_array ) {
458 my ( $key, $val ) = ( $row[0], $row[1] );
460 $match++ if ( $val =~ /\S+,? or \S+,? or \S+,? or \S+,?/ );
461 next unless ($match);
463 $key =~ s/\,/\037\,\037/g;
465 $val =~ s/,? or /, /g;
466 &DEBUG("fixed: => $val.");
467 &setFactInfo( $key, 'factoid_value', $val );
472 my $prefix = 'Inefficient lists fixed ';
473 return &formListReply( 1, $prefix, @list );
476 elsif ( $type =~ /^locked$/i ) {
477 my %hash = &sqlSelectColHash(
478 'factoids', 'factoid_key,locked_by',
479 undef, 'WHERE locked_by IS NOT NULL'
481 my @list = keys %hash;
484 s/([\,\;]+)/\037$1\037/g;
487 my $prefix = "factoid statistics on $type ";
488 return &formListReply( 0, $prefix, @list );
491 elsif ( $type =~ /^new$/i ) {
492 my %hash = &sqlSelectColHash(
493 'factoids', 'factoid_key,created_time',
494 undef, 'WHERE created_time IS NOT NULL'
498 foreach ( keys %hash ) {
499 my $created_time = $hash{$_};
500 my $delta_time = time() - $created_time;
501 next if ( $delta_time >= 60 * 60 * 24 );
503 $age{$delta_time}{$_} = 1;
506 if ( scalar keys %age == 0 ) {
507 return 'sorry, no new factoids.';
511 foreach ( sort { $a <=> $b } keys %age ) {
512 push( @list, join( ',', keys %{ $age{$_} } ) );
515 my $prefix = 'new factoids in the last 24hours ';
516 return &formListReply( 0, $prefix, @list );
519 elsif ( $type =~ /^part(ial)?dupe$/i ) {
520 ### requires 'custom' select statement... oh well...
521 my $start_time = &timeget();
523 # form length|key and key=length hash list.
524 &status('factstats(partdupe): forming length hash list.');
526 'SELECT factoid_key,factoid_value,length(factoid_value) AS length FROM factoids WHERE length(factoid_value) >= 192 ORDER BY length';
527 my $sth = $dbh->prepare($query);
528 &ERROR("factstats(partdupe): => '$query'.") unless $sth->execute;
531 my ( %key, %length );
532 while ( my @row = $sth->fetchrow_array ) {
533 $length{ $row[2] }{ $row[0] } = 1; # length(value)|key.
534 $key{ $row[0] } = $row[1]; # key=value.
535 push( @key, $row[0] );
538 &status( "factstats(partdupe): total keys => '" . scalar(@key) . "'." );
539 &status('factstats(partdupe): now deciphering data gathered');
541 my @length = sort { $a <=> $b } keys %length;
544 foreach $key (@key) {
545 shift @length if ( length $key{$key} == $length[0] );
547 my $val = quotemeta $key{$key};
550 foreach $length (@length) {
551 foreach ( keys %{ $length{$length} } ) {
552 if ( $key{$_} =~ /^$val/i ) {
553 s/([\,\;]+)/\037$1\037/g;
554 s/( and|and )/\037$1\037/g;
555 push( @sublist, $key . ' and ' . $_ );
559 push( @list, join( ' ,', @sublist ) ) if ( scalar @sublist );
562 my $delta_time = sprintf( '%.02fs', &timedelta($start_time) );
563 &status("factstats(partdupe): $delta_time sec to complete.")
564 if ( $delta_time > 0 );
566 # bail out on no results.
567 if ( scalar @list == 0 ) {
568 return 'no initial partial duplicate factoids... woohoo.';
572 my $prefix = 'initial partial dupe factoid ';
573 return &formListReply( 1, $prefix, @list );
576 elsif ( $type =~ /^profanity$/i ) {
577 my %data = &sqlSelectColHash(
578 'factoids', 'factoid_key,factoid_value',
579 undef, 'WHERE factoid_value IS NOT NULL'
583 foreach ( keys %data ) {
584 push( @list, $_ ) if ( &hasProfanity( $_ . ' ' . $data{$_} ) );
588 my $prefix = 'Profanity in factoids ';
589 return &formListReply( 1, $prefix, @list );
592 elsif ( $type =~ /^redir(ection)?$/i ) {
594 &searchTable( 'factoids', 'factoid_key', 'factoid_value',
602 my $val = &getFactInfo( $factoid, 'factoid_value' );
603 if ( $val =~ /^<REPLY> see( also)? (.*?)\.?$/i ) {
605 my $redirval = &getFactInfo( $redir, 'factoid_value' );
606 if ( defined $redirval ) {
607 $redir{$redir}{$factoid} = 1;
611 "factstats(redir): '$factoid' has loose link => '$redir'."
619 foreach $f ( keys %redir ) {
620 my @sublist = keys %{ $redir{$f} };
622 s/([\,\;]+)/\037$1\037/g;
625 push( @newlist, "$f => " . join( ', ', @sublist ) );
629 my $prefix = "Redirections in factoids, $dangling dangling ";
630 return &formListReply( 1, $prefix, @newlist );
633 elsif ( $type =~ /^request(ed)?$/i ) {
635 &sqlSelectColHash( 'factoids', 'factoid_key,requested_count', undef,
636 'WHERE requested_count IS NOT NULL', 1 );
638 if ( !scalar keys %hash ) {
639 return 'sorry, no factoids have been questioned.';
645 foreach $count ( sort { $b <=> $a } keys %hash ) {
646 my @faqtoids = sort keys %{ $hash{$count} };
649 s/([\,\;]+)/\037$1\037/g;
651 $total += $count * scalar(@faqtoids);
653 push( @list, "$count - " . join( ', ', @faqtoids ) );
655 unshift( @list, "\037$total - TOTAL\037" );
657 my $prefix = "factoid statistics on $type ";
658 return &formListReply( 0, $prefix, @list );
661 elsif ( $type =~ /^reqrate$/i ) {
662 my %hash = &sqlSelectColHash(
664 "factoid_key,(unix_timestamp() - created_time)/requested_count as rate",
666 'WHERE requested_by IS NOT NULL and created_time IS NOT NULL ORDER BY rate LIMIT 15',
674 foreach $rate ( sort { $b <=> $a } keys %hash ) {
675 my $f = join( ', ', sort keys %{ $hash{$rate} } );
676 my $str = "$f - " . &Time2String($rate);
681 my $prefix = "Rank of top factoid rate (time/req): ";
682 return &formListReply( 0, $prefix, @list );
685 elsif ( $type =~ /^requesters?$/i ) {
686 my %hash = &sqlSelectColHash(
687 'factoids', 'factoid_key,requested_by',
688 undef, 'WHERE requested_by IS NOT NULL'
692 foreach ( keys %hash ) {
693 my $thisnuh = $hash{$_};
695 $thisnuh =~ /^(\S+)!\S+@\S+$/;
696 $requester{ lc $1 }++;
699 if ( !scalar keys %requester ) {
700 return 'sorry, no factoids with requested_by field.';
705 foreach ( keys %requester ) {
706 $count{ $requester{$_} }{$_} = 1;
714 foreach $count ( sort { $b <=> $a } keys %count ) {
715 my $requester = join( ', ', sort keys %{ $count{$count} } );
716 $total += $count * scalar( keys %{ $count{$count} } );
717 $users += scalar( keys %{ $count{$count} } );
718 push( @list, "$count by $requester" );
721 "\037$total TOTAL REQUESTS; $users UNIQUE REQUESTERS\037" );
723 # should not the above value be the same as collected by
724 # 'requested'? soemthing weird is going on!
726 my $prefix = 'rank of top factoid requesters: ';
727 return &formListReply( 0, $prefix, @list );
730 elsif ( $type =~ /^seefix$/i ) {
732 &searchTable( 'factoids', 'factoid_key', 'factoid_value', '^see ' );
740 my $val = &getFactInfo( $factoid, 'factoid_value' );
742 next unless ( $val =~ /^see( also)? (.*?)\.?$/i );
745 my $redir = &getFactInfo( $redirf, 'factoid_value' );
747 if ( $redirf =~ /^\Q$factoid\W$/i ) {
748 &delFactoid($factoid);
752 if ( defined $redir ) { # good.
753 &setFactInfo( $factoid, 'factoid_value', "<REPLY> see $redir" );
757 push( @newlist, $redirf );
762 &msg( $who, "Fixed $fixed factoids." );
763 &msg( $who, 'Self looped factoids removed: ' . keys %loop )
764 if ( scalar keys %loop );
766 my $prefix = "Loose link (dead) redirections in factoids ";
767 return &formListReply( 1, $prefix, @newlist );
770 elsif ( $type =~ /^(2|too)long$/i ) {
776 "SELECT factoid_key FROM factoids WHERE length(factoid_key) >= $param{'maxKeySize'}";
777 my $sth = $dbh->prepare($query);
779 while ( my @row = $sth->fetchrow_array ) {
780 push( @list, $row[0] );
786 "SELECT factoid_key,factoid_value FROM factoids WHERE length(factoid_value) >= $param{'maxDataSize'}";
787 $sth = $dbh->prepare($query);
789 while ( my @row = $sth->fetchrow_array ) {
791 sprintf( "\002%s\002 - %s", length( $row[1] ), $row[0] ) );
795 if ( scalar @list == 0 ) {
796 return 'good. no factoids exceed length.';
800 my $prefix = 'factoid key||value exceeding length ';
801 return &formListReply( 1, $prefix, @list );
804 elsif ( $type =~ /^unrequest(ed)?$/i ) {
806 # TODO: use sqlSelect()
809 "SELECT COUNT(*) FROM factoids WHERE requested_count = '0'");
811 return "Unrequested factoids: $count";
814 return "error: invalid type => '$type'.";
819 my $maxshow = &::getChanConfDefault( 'maxListReplyCount', 15, $chan );
821 &searchTable( 'factoids', 'factoid_key', 'created_by', "^$query!" );
822 @list = grep( !/\#DEL\#$/, @list ) if ( scalar(@list) > $maxshow );
824 my $prefix = "factoid author list by '$query' ";
825 &performStrictReply( &formListReply( 1, $prefix, @list ) );
830 # vim:ts=4:sw=4:expandtab:tw=80