X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rsem-calculate-expression;h=93fabce4686c8d0b7e4fafeae279c6262e7d412e;hb=a7af78eba4c832c3dc7ff8cbabbd7770bbd015cd;hp=0f22282dfc8f0b4ea259d057b7aca3ebdc2528ce;hpb=01ba1b045976306e502aaf784c4a1d0787f12ac5;p=rsem.git diff --git a/rsem-calculate-expression b/rsem-calculate-expression index 0f22282..93fabce 100755 --- a/rsem-calculate-expression +++ b/rsem-calculate-expression @@ -15,7 +15,7 @@ my $NSPC = 50; my $NMB = 1024; # default -my $status; +my $status = 0; my $read_type = 1; # default, single end with qual @@ -55,6 +55,9 @@ my $keep_intermediate_files = 0; my $strand_specific = 0; +my $mTime = 0; +my ($time_start, $time_end, $time_alignment, $time_rsem, $time_ci) = (0, 0, 0, 0, 0); + GetOptions("keep-intermediate-files" => \$keep_intermediate_files, "no-qualities" => \$no_qual, "paired-end" => \$paired_end, @@ -82,6 +85,7 @@ GetOptions("keep-intermediate-files" => \$keep_intermediate_files, "out-bam" => \$genBamF, "calc-ci" => \$calcCI, "ci-memory=i" => \$NMB, + "time" => \$mTime, "q|quiet" => \$quiet, "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2); @@ -113,7 +117,7 @@ my $mate1_list = ""; my $mate2_list = ""; my $inpF = ""; -my ($refName, $taskName, $tmp_dir, $imdName) = (); +my ($refName, $sampleName, $sampleToken, $temp_dir, $stat_dir, $imdName) = (); my $gap = 32; if ($paired_end) { @@ -129,30 +133,31 @@ if (scalar(@ARGV) == 3) { if ($is_sam || $is_bam) { $inpF = $ARGV[0]; } else {$mate1_list = $ARGV[0]; } $refName = $ARGV[1]; - $taskName = $ARGV[2]; + $sampleName = $ARGV[2]; } else { $mate1_list = $ARGV[0]; $mate2_list = $ARGV[1]; $refName = $ARGV[2]; - $taskName = $ARGV[3]; + $sampleName = $ARGV[3]; } -$tmp_dir = $taskName.".temp"; -my $pos = rindex($taskName, '/'); -if ($pos < 0) { - $imdName = "$tmp_dir/$taskName"; -} -else { - $imdName = $tmp_dir."/".substr($taskName, $pos + 1); -} +my $pos = rindex($sampleName, '/'); +if ($pos < 0) { $sampleToken = $sampleName; } +else { $sampleToken = substr($sampleName, $pos + 1); } + +$temp_dir = "$sampleName.temp"; +$stat_dir = "$sampleName.stat"; + +if (!(-d $temp_dir) && !mkdir($temp_dir)) { print "Fail to create folder $temp_dir.\n"; exit(-1); } +if (!(-d $stat_dir) && !mkdir($stat_dir)) { print "Fail to create folder $stat_dir.\n"; exit(-1); } + +$imdName = "$temp_dir/$sampleToken"; if (!$is_sam && !$is_bam && $phred33 + $phred64 + $solexa == 0) { $phred33 = 1; } my ($mate_minL, $mate_maxL) = (1, $maxL); -if (!(-d $tmp_dir) && !mkdir($tmp_dir)) { print "Fail to create the directory.\n"; exit(-1); } - if ($bowtie_path ne "") { $bowtie_path .= "/"; } my ($fn, $dir, $suf) = fileparse($0); @@ -173,7 +178,7 @@ if (!$is_sam && !$is_bam) { if ($read_type == 2 || $read_type == 3) { $command .= " -I $minL -X $maxL"; } if ($strand_specific || $probF == 1.0) { $command .= " --norc"; } - elsif ($probF = 0.0) { $command .= " --nofw"; } + elsif ($probF == 0.0) { $command .= " --nofw"; } $command .= " -p $nThreads -a -m $maxHits -S"; if ($quiet) { $command .= " --quiet"; } @@ -188,7 +193,13 @@ if (!$is_sam && !$is_bam) { $command .= " | gzip > $imdName.sam.gz"; print "$command\n"; + + if ($mTime) { $time_start = time(); } + $status = system($command); + + if ($mTime) { $time_end = time(); $time_alignment = $time_end - $time_start; } + if ($status != 0) { print "bowtie failed! Please check if you provide correct parameters/options for the pipeline!\n"; exit(-1); @@ -199,7 +210,9 @@ if (!$is_sam && !$is_bam) { $is_sam = 1; # output of bowtie is a sam file } -$command = $dir."rsem-parse-alignments $refName $imdName"; +if ($mTime) { $time_start = time(); } + +$command = $dir."rsem-parse-alignments $refName $sampleName $sampleToken"; my $samInpType; if ($is_sam) { $samInpType = "s"; } @@ -233,8 +246,8 @@ if ($status != 0) { } print "\n"; -$status = open(OUTPUT, ">$imdName.mparams"); -if ($status == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); } +my $doesOpen = open(OUTPUT, ">$imdName.mparams"); +if ($doesOpen == 0) { print "Cannot generate $imdName.mparams!\n"; exit(-1); } print OUTPUT "$minL $maxL\n"; print OUTPUT "$probF\n"; print OUTPUT "$estRSPD\n"; @@ -244,7 +257,7 @@ print OUTPUT "$mean $sd\n"; print OUTPUT "$L\n"; close(OUTPUT); -$command = $dir."rsem-run-em $refName $read_type $imdName $taskName -p $nThreads"; +$command = $dir."rsem-run-em $refName $read_type $sampleName $sampleToken -p $nThreads"; if ($genBamF) { $command .= " -b $samInpType $inpF"; if ($fn_list ne "") { $command .= " 1 $fn_list"; } @@ -262,7 +275,7 @@ if ($status != 0) { print "\n"; if ($genBamF) { - $command = $dir."sam/samtools sort $taskName.bam $taskName.sorted"; + $command = $dir."sam/samtools sort $sampleName.bam $sampleName.sorted"; print "$command\n"; $status = system($command); if ($status != 0) { @@ -270,7 +283,7 @@ if ($genBamF) { exit(-1); } print "\n"; - $command = $dir."sam/samtools index $taskName.sorted.bam"; + $command = $dir."sam/samtools index $sampleName.sorted.bam"; print "$command\n"; $status = system($command); if ($status != 0) { @@ -280,11 +293,16 @@ if ($genBamF) { print "\n"; } -&collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level -&collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level +&collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level +&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level + +if ($mTime) { $time_end = time(); $time_rsem = $time_end - $time_start; } + +if ($mTime) { $time_start = time(); } if ($calcCI) { - $command = $dir."rsem-run-gibbs $refName $taskName $imdName $BURNIN $CHAINLEN $SAMPLEGAP"; + $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $CHAINLEN $SAMPLEGAP"; +# $command .= " -p $nThreads"; if ($quiet) { $command .= " -q"; } print "$command\n"; $status = system($command); @@ -294,12 +312,12 @@ if ($calcCI) { } print "\n"; - system("mv $taskName.isoforms.results $imdName.isoforms.results.bak1"); - system("mv $taskName.genes.results $imdName.genes.results.bak1"); - &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level - &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level + system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1"); + system("mv $sampleName.genes.results $imdName.genes.results.bak1"); + &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level + &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level - $command = $dir."rsem-calculate-credibility-intervals $refName $taskName $imdName $CONFIDENCE $NSPC $NMB"; + $command = $dir."rsem-calculate-credibility-intervals $refName $sampleName $sampleToken $CONFIDENCE $NSPC $NMB"; if ($quiet) { $command .= " -q"; } print "$command\n"; $status = system($command); @@ -309,20 +327,36 @@ if ($calcCI) { } print "\n"; - system("mv $taskName.isoforms.results $imdName.isoforms.results.bak2"); - system("mv $taskName.genes.results $imdName.genes.results.bak2"); - &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level - &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level + system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2"); + system("mv $sampleName.genes.results $imdName.genes.results.bak2"); + &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level + &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level } +if ($mTime) { $time_end = time(); $time_ci = $time_end - $time_start; } + +if ($mTime) { $time_start = time(); } + if (!$keep_intermediate_files) { - $status = system ("rm -rf $tmp_dir"); + $status = system("rm -rf $temp_dir"); if ($status != 0) { print "Fail to delete the temporary folder!\n"; exit(-1); } } +if ($mTime) { $time_end = time(); } + +if ($mTime) { + open(OUTPUT, ">$sampleName.time"); + print OUTPUT "Alignment: $time_alignment s.\n"; + print OUTPUT "RSEM: $time_rsem s.\n"; + print OUTPUT "CI: $time_ci s.\n"; + my $time_del = $time_end - $time_start; + print OUTPUT "Delete: $time_del s.\n"; + close(OUTPUT); +} + # inpF, outF sub collectResults { my $local_status; @@ -582,11 +616,6 @@ file. If no other attributes are given or no GTF file is provided in 'rsem-prepare-reference', there will be no tab after the tau_value field. -=item B and B - -Output files used by RSEM internally for tasks like simulation, -compute credibility intervals etc. - =item B Only generated when --out-bam is specified. @@ -604,6 +633,9 @@ representing the posterior probability. 'sample_name.sorted.bam' and 'sample_name.sorted.bam.bai' are the sorted BAM file and indices generated by samtools (included in RSEM package). +=item B + +This is a folder instead of a file. All model related statistics are stored in this folder. Use 'rsem-plot-model' can generate plots using this folder. =back @@ -661,3 +693,5 @@ Assume the path to the bowtie executables is in the user's PATH environment vari mmliver_paired_end_quals =cut + +# LocalWords: usr