X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Fparse_harvester_results;h=e52066127ceab16107b04bfc68c919ee150adb86;hp=7a8cfd4a3ef37b40e7d63c36424d2e34c81ae59d;hb=50e9739109c91b53ab620b462255f71a0b870f95;hpb=dd8ec1d4cecf282940831171ef0f796570d781fd diff --git a/bin/parse_harvester_results b/bin/parse_harvester_results index 7a8cfd4..e520661 100755 --- a/bin/parse_harvester_results +++ b/bin/parse_harvester_results @@ -142,8 +142,16 @@ while ($_ = $dir->read) { ($results[NAME]) = $result =~ m&\s*Entry\s*name\s* \s*\s*([^<]+?)\s*\s*&xis; } + if (not defined $results[NAME]) { + ($results[NAME]) = $result =~ m{[^:]+:\s*[^\*]+\*[^\*]+\*\s*([^-]+)}xis; + $results[NAME] =~ s/\s*$// if defined $results[NAME]; + $results[NAME] =~ s/^\s*$// if defined $results[NAME]; + $results[NAME] =~ s/\d+\s*kDa\s*protein// if defined $results[NAME]; + $results[NAME] =~ s/\s*similar to .+// if defined $results[NAME]; + } $results[NAME] ||= 'NO NAME'; + $results[NAME] =~ s/_HUMAN//; # Find REF SEQ number ($results[REFSEQ]) = $result =~ m&<a\s+href="http://www.ncbi.nlm.nih.gov/entrez/