X-Git-Url: https://git.donarmstrong.com/?p=function2gene.git;a=blobdiff_plain;f=bin%2Fparse_ensembl_results;h=df20b6c541568e01eca5b261a124dba9b4cfa5ba;hp=cdd1fbacd7cd6905e29ae59da8c3677486c2e3e7;hb=029f32c46252899b050634e52c46a0e15505bc47;hpb=b09fb9f692a51f9f0e6451a32cf4b869566dbec8 diff --git a/bin/parse_ensembl_results b/bin/parse_ensembl_results index cdd1fba..df20b6c 100755 --- a/bin/parse_ensembl_results +++ b/bin/parse_ensembl_results @@ -107,7 +107,7 @@ if ($options{keywords}) { if (@ARGV != 1) { pod2usage("If the --keywords option is used, exactly one argument (the keyword) must be passed"); } - $options{dir} = "$ARGV[0]_results_genecard"; + $options{dir} = "$ARGV[0]_results_ensembl"; } if (not -d $options{dir}) { @@ -118,7 +118,7 @@ my $dir = new IO::Dir $options{dir} or die "Unable to open dir $options{dir}: $! print join(",", map {qq("$_");} qw(Name RefSeq Location Alias Function Description Keyword DBName Filename)),qq(\n); -my ($keyword) = $options{keyword} || $options{dir} =~ m#(?:^|/)([^\/]+)_results_genecard#; +my ($keyword) = $options{keyword} || $options{dir} =~ m#(?:^|/)([^\/]+)_results_ensembl#; while ($_ = $dir->read) { my $file_name = $_; @@ -134,9 +134,11 @@ while ($_ = $dir->read) { my @results; # Find gene name - ($results[NAME]) = map {s/^[^:]+://; $_;}$result =~ m{a\s+href=\"[^"]+genenames.org[^"]+">\s*([^<]+?)\s*}xis; + ($results[NAME]) = $result =~ m{a\s+href=\"[^"]+genenames.org[^"]+">\s*([^<]+?)\s*}xis; $results[NAME] ||= 'NO NAME'; + # strip of leading : bits + $results[NAME] =~ s/^[^\:]+\://; # Find REF SEQ number ($results[REFSEQ]) = $result =~ m{for\s*(ENSG\d+)}xis;