X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rsem-prepare-reference;h=177cc43c83e35fa8e3f4bc3dbd475cc964a6016f;hb=ca00325bea501441a98ecc860e2914c4f297d3d6;hp=81fd13ea74edab4b31dd147db9be502e62321167;hpb=66a1547e5d549a915cefb729222be915b87fb34d;p=rsem.git diff --git a/rsem-prepare-reference b/rsem-prepare-reference index 81fd13e..177cc43 100755 --- a/rsem-prepare-reference +++ b/rsem-prepare-reference @@ -1,13 +1,16 @@ -#!/usr/bin/perl +#!/usr/bin/env perl use Getopt::Long; use Pod::Usage; use FindBin; -use lib $FindBin::Bin; -use strict; - +use lib $FindBin::RealBin; use rsem_perl_utils; +use Env qw(@PATH); +push(@PATH, $FindBin::RealBin); + +use strict; + my $status; my $gtfF = ""; @@ -16,22 +19,25 @@ my $polyAChoice = 0; # 0, default, pad polyA tails for all isoforms; 1, --no-pol my $no_polyA = 0; # for option --no-polyA my $subsetFile = ""; my $polyALen = 125; +my $ntog = 0; +my $bowtie = 0; my $bowtie_path = ""; -my $no_bowtie = 0; -my $no_ntog = 0; my $bowtie2 = 0; my $bowtie2_path = ""; my $quiet = 0; my $help = 0; +my $alleleMappingF = ""; + GetOptions("gtf=s" => \$gtfF, "transcript-to-gene-map=s" => \$mappingF, + "allele-to-gene-map=s" => \$alleleMappingF, "no-polyA" => \$no_polyA, "no-polyA-subset=s" => \$subsetFile, "polyA-length=i" => \$polyALen, + "ntog" => \$no_ntog, + "bowtie" => \$bowtie, "bowtie-path=s" => \$bowtie_path, - "no-bowtie" => \$no_bowtie, - "no-ntog" => \$no_ntog, "bowtie2" => \$bowtie2, "bowtie2-path=s" => \$bowtie2_path, "q|quiet" => \$quiet, @@ -39,14 +45,12 @@ GetOptions("gtf=s" => \$gtfF, pod2usage(-verbose => 2) if ($help == 1); pod2usage(-msg => "Set --no-polyA & --no-polyA-subset at the same time!", -exitval => 2, -verbose => 2) if ($no_polyA == 1 && $subsetFile ne ''); +pod2usage(-msg => "--transcript-to-gene-map and --allele-to-gene-map are mutually exclusive!", -exitval => 2, -verbose => 2) if (($mappingF ne "") && ($alleleMappingF ne "")); +pod2usage(-msg => "--gtf and --allele-to-gene-map are mutually exclusive!", -exitval => 2, -verbose => 2) if (($gtfF ne "") && ($alleleMappingF ne "")); pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 2); -if ($bowtie2) { $no_bowtie = 1; $no_ntog = 1; } - -pod2usage(-msg => "If bowtie is used, --no-ntog cannot be set!", -exitval => 2, -verbose => 2) if (!$no_bowtie && $no_ntog); - -if ($no_bowtie && ($bowtie_path ne "")) { print "Warning: If bowtie is not used, no need to set --bowtie-path option!\n"; } -if (!$bowtie2 && ($bowtie2_path ne "")) { print "Warning: If bowtie2 is not used, no need to set --bowtie2-path option!\n"; } +if (!$bowtie && ($bowtie_path ne "")) { print "Warning: If Bowtie is not used, no need to set --bowtie-path option!\n"; } +if (!$bowtie2 && ($bowtie2_path ne "")) { print "Warning: If Bowtie 2 is not used, no need to set --bowtie2-path option!\n"; } my $type; @@ -70,12 +74,11 @@ elsif ($subsetFile ne "") { $polyAChoice = 2; } if ($bowtie_path ne "") { $bowtie_path .= "/"; } if ($bowtie2_path ne "") { $bowtie2_path .= "/"; } -my $dir = "$FindBin::Bin/"; my $command = ""; if ($type == 0) { $"=" "; - $command = $dir."rsem-extract-reference-transcripts $ARGV[1] $quiet $gtfF"; + $command = "rsem-extract-reference-transcripts $ARGV[1] $quiet $gtfF"; if ($mappingF ne "") { $command .= " 1 $mappingF"; } else { $command .= " 0"; } $command .= " @list"; @@ -83,14 +86,15 @@ if ($type == 0) { } else { $"=" "; - $command = $dir."rsem-synthesis-reference-transcripts $ARGV[1] $quiet"; + $command = "rsem-synthesis-reference-transcripts $ARGV[1] $quiet"; if ($mappingF ne "") { $command .= " 1 $mappingF"; } + elsif ($alleleMappingF ne "") { $command .= " 2 $alleleMappingF"; } else { $command .= " 0"; } $command .= " @list"; &runCommand($command); } -$command = $dir."rsem-preref $ARGV[1].transcripts.fa $polyAChoice $ARGV[1] -l $polyALen"; +$command = "rsem-preref $ARGV[1].transcripts.fa $polyAChoice $ARGV[1] -l $polyALen"; if ($polyAChoice == 2) { $command .= " -f $subsetFile"; } if ($no_ntog) { $command .= " --no-ntog"; } if ($quiet) { $command .= " -q"; } @@ -164,6 +168,19 @@ If this option is off, then the mapping of isoforms to genes depends on whether (Default: off) +=item B<--allele-to-gene-map> + +Use information from to provide gene_id and transcript_id information for each allele-specific transcript. +Each line of should be of the form: + +gene_id transcript_id allele_id + +with the fields separated by a tab character. + +This option is designed for quantifying allele-specific expression. It is only valid if '--gtf' option is not specified. allele_id should be the sequence names presented in the FASTA-formatted files. + +(Default: off) + =item B<--no-polyA> Do not add poly(A) tails to the end of reference isoforms. (Default: adding poly(A) tails to all transcripts)