X-Git-Url: https://git.donarmstrong.com/?p=deb_pkgs%2Fscowl.git;a=blobdiff_plain;f=current%2Fsrc%2Fcount;h=6787028f2e19ec0f3d0d24119b0a6fa9fcd8a03f;hp=bd9f6df4974403c16c61307cf1592f0fc228b4bc;hb=7b14ba883fb1046508c44be37b4c6ba5da5feacf;hpb=b72d489091bf51f14f63db9bec15fe71fa52a395 diff --git a/current/src/count b/current/src/count index bd9f6df..6787028 100755 --- a/current/src/count +++ b/current/src/count @@ -1,10 +1,40 @@ -#!/bin/sh - -c=0 -for f in 10 20 35 40 50 55 60 70 80 95 -do - n=`cat final/english-{words,abbreviations,contractions}.$f 2> /dev/null | wc -l ` - p=`cat final/english-{proper-names,upper}.$f 2> /dev/null | wc -l` - c=`expr $c + $n + $p` - echo $f $n $p $c -done +#!/usr/bin/perl + +use strict; +use warnings; + +my @data; + +my $c = 0; +my $hc = 0; +foreach my $f (10,20,35,40,50,55,60,70,80,95) { + my $n = `bash -c 'cat final/{american,english}-{words,abbreviations,contractions}.$f 2> /dev/null | wc -l'` + 0; + my $p = `bash -c 'cat final/{american,english}-{proper-names,upper}.$f 2> /dev/null | wc -l'` + 0; + my $c0 = $c; + $c = $c + $n + $p; + my $h; + for (my $i = $c0+1; $i <= $c; ++$i) { + $h += 1/$i; + } + $hc += $h; + push @data, [$f, $n, $p, $c, $hc]; +} + +sub commify { + local $_ = shift; + 1 while s/^([-+]?\d+)(\d{3})/$1,$2/; + return $_; +} + + +#print " Size Words Names Running Total % Zipf's law\n"; +print " Size Words Names Running Total %\n"; + +foreach (@data) { + my ($f, $n, $p, $c0, $h) = @$_; + my $cp = 100*$c0/$c; + my $hp = 100*$h/$hc; + #printf(" %2d %7s %7s %7s %5.1f %6.2f\n", $f, commify($n), commify($p), commify($c0), $cp, $hp); + printf(" %2d %7s %7s %7s %5.1f\n", $f, commify($n), commify($p), commify($c0), $cp); + #print "$f $n $p $c0 $cp $hp\n"; +}