X-Git-Url: https://git.donarmstrong.com/?p=deb_pkgs%2Fscowl.git;a=blobdiff_plain;f=current%2Fsrc%2Fadd-affixes;h=a84a8155c993fe8354e452d777340ab90e2808ff;hp=caee13adbd28e8acc2ee3bec839c2540ae22f5be;hb=7b14ba883fb1046508c44be37b4c6ba5da5feacf;hpb=b72d489091bf51f14f63db9bec15fe71fa52a395 diff --git a/current/src/add-affixes b/current/src/add-affixes index caee13a..a84a815 100755 --- a/current/src/add-affixes +++ b/current/src/add-affixes @@ -1,38 +1,72 @@ #!/usr/bin/perl -$inc_level = 0; -if ($ARGV[0] ne '') { +use strict; +#use warnings; +#no warnings 'uninitialized'; + +# Usage: add-affixes [] [] + +my $inc_level = 0; +if ($ARGV[0] =~ /^[\d.]+$/) { $inc_level = $ARGV[0]; + shift @ARGV; } -print STDERR "Include Level: $inc_level\n"; +#print STDERR "Include Level: $inc_level\n"; -if ($ARGV[1] eq 'use-all') { +my $use_all = 0; +my $just_possessive = 0; +my $no_possessive = 0; +foreach (@ARGV) { + if ($_ eq 'use-all') {$use_all = 2} + elsif ($_ eq 'use-some') {$use_all = 1} + elsif ($_ eq 'just-possessive') {$just_possessive = 1} + elsif ($_ eq 'no-possessive') {$no_possessive = 1} + else {die "ERROR: Invalid flag $_\n";} +} - print STDERR "Skipping Remove List\n"; +open F, "r/alt12dicts/2of12id.txt" or die; -} else { - - open F, "r/alt12dicts/2of12id.txt" or die; - - while () { - s/\r?\n$// or die; - ($d,$w,$p,$a) = /^(\-?)(\w+) (.).*: ?(.*)$/ or die; - my @a = $a =~ /([~@-]*\w+)/g; - @a = map {"$d$_"} @a if ($d); - my (@a0,@a1); - foreach (@a) {if (s/^[~-]//) {push @a0, $_} else {push @a1, $_}} - $remove{"$w:$p"} = 1 unless @a1; - foreach (@a0) {$remove{"$w:$p:$_"} = 1} - } +my %lookup; +my %remove; +my %possessive; + +while () { + s/\r?\n$// or die; + # (flags, base word, part of speach, infl forms) + my ($d,$w,$p,$a) = /^([-@]?)(\w+) (.).*: ?(.*)$/ or die; + $possessive{$w} = "$w\'s\n" if $p eq 'N' && ($d eq '' || $use_all); + next if $use_all; + my @a = $a =~ /([-~@\w]+)/g; + @a = map {"$d$_"} @a if ($d); + my (@a0,@a1); + foreach (@a) {if (s/^[~-]//) {push @a0, $_} else {push @a1, $_}} + $remove{"$w:$p"} = 1 unless @a1; + foreach (@a0) {$remove{"$w:$p:$_"} = 1} } +# Maybe using AGID isn't a good idea here, many of the entries in AGID +# are unchecked, for example "associationism" may be an uncountable +# noun, but since the base entry is not in 2of12id it is not flagged +# that way, thus the plural "associationisms" gets included. However, +# AGID still needs to be used for uppercase words since they are not +# in 2of12id. For now I won't worry about it since it primary effects +# level 70 of SCOWL. + open F, "r/infl/infl.txt" or die; while () { - ($w,$p,$a) = /(\S+) (.).*: (.+)/ or die; + # (base word, part of speach, guess flag, infl forms) + my ($w,$p,$q,$a) = /(\S+) (.)(.*): (.+)/ or die; + # Add possive form if + # AGID things it is a noun and "use-some" or Uppercase + # (since 2of12id doesn't include uppercase) + # AGIG is guessing it is a noun and "use-all" + $possessive{$w} = "$w\'s\n" if $p eq 'N' && (($q eq '' && ($use_all || $w =~ /^[A-Z]/)) + || $use_all >= 2); next if $remove{"$w:$p"}; - @a = split /, | \| /, $a; + next unless $q eq '' || $use_all >= 2; + my @a = split /, | \| /, $a; @a = grep {my ($word,$tags,$level) = /^([A-Za-z\']+)([~) { $lookup{$w} .= join("\n",@a)."\n"; } +unless ($no_possessive) { + + open F, "r/special/not-possessive" or die; + + while () { + chop; + delete $possessive{$_}; + } + + open F, "working/possessive-also.lst" or die; + + while () { + chop; + $possessive{$_} = "$_\'s\n"; + } +} + while () { print; chop; - print $lookup{$_}; + print $lookup{$_} unless $just_possessive; + print $possessive{$_} unless $no_possessive; }