X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Fparse_harvester_results;h=e52066127ceab16107b04bfc68c919ee150adb86;hp=29d07194a8a4e7c830fe7af55bdaa4d9e873a15d;hb=27d14c1110035a9e8425c100f587792517d1f19b;hpb=d09b67e0af77d6f2818e41d6b4d648cff651c79d diff --git a/bin/parse_harvester_results b/bin/parse_harvester_results index 29d0719..e520661 100755 --- a/bin/parse_harvester_results +++ b/bin/parse_harvester_results @@ -77,9 +77,12 @@ my %options = (debug => 0, man => 0, dir => '.', keyword => undef, + keywords => 0, ); -GetOptions(\%options,'keyword|k=s','dir|D=s','debug|d+','help|h|?','man|m'); +GetOptions(\%options,'keyword|k=s','dir|D=s','debug|d+','help|h|?','man|m', + 'keywords', + ); pod2usage() if $options{help}; @@ -99,6 +102,15 @@ use constant {NAME => 0, FILENAME => 8, }; +if ($options{keywords}) { + if (@ARGV != 1) { + pod2usage("If the --keywords option is used, exactly one argument (the keyword) must be passed"); + } + $options{dir} = "$ARGV[0]_results_harvester"; +} + + + if (not -d $options{dir}) { die "$options{dir} does not exist or is not a directory"; } @@ -130,8 +142,16 @@ while ($_ = $dir->read) { ($results[NAME]) = $result =~ m&\s*Entry\s*name\s* \s*\s*([^<]+?)\s*\s*&xis; } + if (not defined $results[NAME]) { + ($results[NAME]) = $result =~ m{[^:]+:\s*[^\*]+\*[^\*]+\*\s*([^-]+)}xis; + $results[NAME] =~ s/\s*$// if defined $results[NAME]; + $results[NAME] =~ s/^\s*$// if defined $results[NAME]; + $results[NAME] =~ s/\d+\s*kDa\s*protein// if defined $results[NAME]; + $results[NAME] =~ s/\s*similar to .+// if defined $results[NAME]; + } $results[NAME] ||= 'NO NAME'; + $results[NAME] =~ s/_HUMAN//; # Find REF SEQ number ($results[REFSEQ]) = $result =~ m&<a\s+href="http://www.ncbi.nlm.nih.gov/entrez/