X-Git-Url: https://git.donarmstrong.com/?p=deb_pkgs%2Fscowl.git;a=blobdiff_plain;f=current%2Fsrc%2Fadd-affixes;fp=current%2Fsrc%2Fadd-affixes;h=0000000000000000000000000000000000000000;hp=a84a8155c993fe8354e452d777340ab90e2808ff;hb=b13ea8a082364672c6de2b010e558211ff52ec9a;hpb=01534a94130c1f5a3a230cf4fe18365a235ba271 diff --git a/current/src/add-affixes b/current/src/add-affixes deleted file mode 100755 index a84a815..0000000 --- a/current/src/add-affixes +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/perl - -use strict; -#use warnings; -#no warnings 'uninitialized'; - -# Usage: add-affixes [] [] - -my $inc_level = 0; -if ($ARGV[0] =~ /^[\d.]+$/) { - $inc_level = $ARGV[0]; - shift @ARGV; -} - -#print STDERR "Include Level: $inc_level\n"; - -my $use_all = 0; -my $just_possessive = 0; -my $no_possessive = 0; -foreach (@ARGV) { - if ($_ eq 'use-all') {$use_all = 2} - elsif ($_ eq 'use-some') {$use_all = 1} - elsif ($_ eq 'just-possessive') {$just_possessive = 1} - elsif ($_ eq 'no-possessive') {$no_possessive = 1} - else {die "ERROR: Invalid flag $_\n";} -} - -open F, "r/alt12dicts/2of12id.txt" or die; - -my %lookup; -my %remove; -my %possessive; - -while () { - s/\r?\n$// or die; - # (flags, base word, part of speach, infl forms) - my ($d,$w,$p,$a) = /^([-@]?)(\w+) (.).*: ?(.*)$/ or die; - $possessive{$w} = "$w\'s\n" if $p eq 'N' && ($d eq '' || $use_all); - next if $use_all; - my @a = $a =~ /([-~@\w]+)/g; - @a = map {"$d$_"} @a if ($d); - my (@a0,@a1); - foreach (@a) {if (s/^[~-]//) {push @a0, $_} else {push @a1, $_}} - $remove{"$w:$p"} = 1 unless @a1; - foreach (@a0) {$remove{"$w:$p:$_"} = 1} -} - -# Maybe using AGID isn't a good idea here, many of the entries in AGID -# are unchecked, for example "associationism" may be an uncountable -# noun, but since the base entry is not in 2of12id it is not flagged -# that way, thus the plural "associationisms" gets included. However, -# AGID still needs to be used for uppercase words since they are not -# in 2of12id. For now I won't worry about it since it primary effects -# level 70 of SCOWL. - -open F, "r/infl/infl.txt" or die; - -while () { - # (base word, part of speach, guess flag, infl forms) - my ($w,$p,$q,$a) = /(\S+) (.)(.*): (.+)/ or die; - # Add possive form if - # AGID things it is a noun and "use-some" or Uppercase - # (since 2of12id doesn't include uppercase) - # AGIG is guessing it is a noun and "use-all" - $possessive{$w} = "$w\'s\n" if $p eq 'N' && (($q eq '' && ($use_all || $w =~ /^[A-Z]/)) - || $use_all >= 2); - next if $remove{"$w:$p"}; - next unless $q eq '' || $use_all >= 2; - my @a = split /, | \| /, $a; - @a = grep {my ($word,$tags,$level) - = /^([A-Za-z\']+)([~) { - chop; - delete $possessive{$_}; - } - - open F, "working/possessive-also.lst" or die; - - while () { - chop; - $possessive{$_} = "$_\'s\n"; - } -} - -while () { - print; - chop; - print $lookup{$_} unless $just_possessive; - print $possessive{$_} unless $no_possessive; -}