#!/usr/bin/perl use strict; #use warnings; #no warnings 'uninitialized'; # Usage: add-affixes [] [] my $inc_level = 0; if ($ARGV[0] =~ /^[\d.]+$/) { $inc_level = $ARGV[0]; shift @ARGV; } #print STDERR "Include Level: $inc_level\n"; my $use_all = 0; my $just_possessive = 0; my $no_possessive = 0; foreach (@ARGV) { if ($_ eq 'use-all') {$use_all = 2} elsif ($_ eq 'use-some') {$use_all = 1} elsif ($_ eq 'just-possessive') {$just_possessive = 1} elsif ($_ eq 'no-possessive') {$no_possessive = 1} else {die "ERROR: Invalid flag $_\n";} } open F, "r/alt12dicts/2of12id.txt" or die; my %lookup; my %remove; my %possessive; # possessive_cross = additional forms that should be looked up in the # possessive hash my %possessive_cross; while () { s/\r?\n$// or die; # (flags, base word, part of speach, infl forms) my ($d,$w,$p,$a) = /^([-@\+]?)(\w+) (.).*: ?(.*)$/ or die "Bad line: >$_<"; $d =~ tr/+//d; $possessive{$w} = "$w\'s\n" if $p eq 'N' && ($d eq '' || $use_all); next if $use_all; my @a = $a =~ /([-~@!\w]+)/g; @a = map {"$d$_"} @a if ($d); my (@a0,@a1); foreach (@a) {if (s/^[~!-]//) {push @a0, $_} else {push @a1, $_}} $remove{"$w:$p"} = 1 unless @a1; foreach (@a0) {$remove{"$w:$p:$_"} = 1} } # Maybe using AGID isn't a good idea here, many of the entries in AGID # are unchecked, for example "associationism" may be an uncountable # noun, but since the base entry is not in 2of12id it is not flagged # that way, thus the plural "associationisms" gets included. However, # AGID still needs to be used for uppercase words since they are not # in 2of12id. For now I won't worry about it since it primary effects # level 70 of SCOWL. open F, "r/infl/infl.txt" or die; while () { # (base word, part of speach, guess flag, infl forms) my ($w,$p,$q,$a) = /(\S+) (.)(.*): (.+)/ or die; # Add possive form if # AGID things it is a noun and "use-some" or Uppercase # (since 2of12id doesn't include uppercase) # AGIG is guessing it is a noun and "use-all" my $add_possessive = $p eq 'N' && (($q eq '' && ($use_all || $w =~ /^[A-Z]/)) || $use_all >= 2); $possessive{$w} = "$w\'s\n" if $add_possessive; next if $remove{"$w:$p"}; next unless $q eq '' || $use_all >= 2; my @a = split /, | \| /, $a; @a = grep {my ($word,$tags,$level) = /^([A-Za-z\']+)([~) { chop; delete $possessive{$_}; } open F, "working/possessive-also.lst" or die; while () { chop; $possessive{$_} = "$_\'s\n"; } } while () { print; chop; my $w = $_; print $lookup{$w} unless $just_possessive; print $possessive{$w} unless $no_possessive; unless ($just_possessive || $no_possessive) { foreach (@{$possessive_cross{$w}}) { print $possessive{$_} } } }