X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=rsem-calculate-expression;h=06604d96a8c415b4eaeaf9e74854d6bf772a8d17;hb=de840718636408cffb3ac1b614bb26a5ad5dec91;hp=3539fa498e76e4c902caa87fdb242433207f65ae;hpb=a97cc1d4f0111f7fe523227412a2147f7a763d56;p=rsem.git diff --git a/rsem-calculate-expression b/rsem-calculate-expression index 3539fa4..06604d9 100755 --- a/rsem-calculate-expression +++ b/rsem-calculate-expression @@ -113,7 +113,7 @@ my $mate1_list = ""; my $mate2_list = ""; my $inpF = ""; -my ($refName, $taskName, $tmp_dir, $imdName) = (); +my ($refName, $sampleName, $sampleToken, $temp_dir, $stat_dir, $imdName) = (); my $gap = 32; if ($paired_end) { @@ -129,30 +129,31 @@ if (scalar(@ARGV) == 3) { if ($is_sam || $is_bam) { $inpF = $ARGV[0]; } else {$mate1_list = $ARGV[0]; } $refName = $ARGV[1]; - $taskName = $ARGV[2]; + $sampleName = $ARGV[2]; } else { $mate1_list = $ARGV[0]; $mate2_list = $ARGV[1]; $refName = $ARGV[2]; - $taskName = $ARGV[3]; + $sampleName = $ARGV[3]; } -$tmp_dir = $taskName.".temp"; -my $pos = rindex($taskName, '/'); -if ($pos < 0) { - $imdName = "$tmp_dir/$taskName"; -} -else { - $imdName = $tmp_dir."/".substr($taskName, $pos + 1); -} +my $pos = rindex($sampleName, '/'); +if ($pos < 0) { $sampleToken = $sampleName; } +else { $sampleToken = substr($sampleName, $pos + 1); } + +$temp_dir = "$sampleName.temp"; +$stat_dir = "$sampleName.stat"; + +if (!(-d $temp_dir) && !mkdir($temp_dir)) { print "Fail to create folder $temp_dir.\n"; exit(-1); } +if (!(-d $stat_dir) && !mkdir($stat_dir)) { print "Fail to create folder $stat_dir.\n"; exit(-1); } + +$imdName = "$temp_dir/$sampleToken"; if (!$is_sam && !$is_bam && $phred33 + $phred64 + $solexa == 0) { $phred33 = 1; } my ($mate_minL, $mate_maxL) = (1, $maxL); -if (!(-d $tmp_dir) && !mkdir($tmp_dir)) { print "Fail to create the directory.\n"; exit(-1); } - if ($bowtie_path ne "") { $bowtie_path .= "/"; } my ($fn, $dir, $suf) = fileparse($0); @@ -173,7 +174,7 @@ if (!$is_sam && !$is_bam) { if ($read_type == 2 || $read_type == 3) { $command .= " -I $minL -X $maxL"; } if ($strand_specific || $probF == 1.0) { $command .= " --norc"; } - elsif ($probF = 0.0) { $command .= " --nofw"; } + elsif ($probF == 0.0) { $command .= " --nofw"; } $command .= " -p $nThreads -a -m $maxHits -S"; if ($quiet) { $command .= " --quiet"; } @@ -199,7 +200,7 @@ if (!$is_sam && !$is_bam) { $is_sam = 1; # output of bowtie is a sam file } -$command = $dir."rsem-parse-alignments $refName $imdName"; +$command = $dir."rsem-parse-alignments $refName $sampleName $sampleToken"; my $samInpType; if ($is_sam) { $samInpType = "s"; } @@ -244,7 +245,7 @@ print OUTPUT "$mean $sd\n"; print OUTPUT "$L\n"; close(OUTPUT); -$command = $dir."rsem-run-em $refName $read_type $imdName $taskName -p $nThreads"; +$command = $dir."rsem-run-em $refName $read_type $sampleName $sampleToken -p $nThreads"; if ($genBamF) { $command .= " -b $samInpType $inpF"; if ($fn_list ne "") { $command .= " 1 $fn_list"; } @@ -262,7 +263,7 @@ if ($status != 0) { print "\n"; if ($genBamF) { - $command = $dir."sam/samtools sort $taskName.bam $taskName.sorted"; + $command = $dir."sam/samtools sort $sampleName.bam $sampleName.sorted"; print "$command\n"; $status = system($command); if ($status != 0) { @@ -270,7 +271,7 @@ if ($genBamF) { exit(-1); } print "\n"; - $command = $dir."sam/samtools index $taskName.sorted.bam"; + $command = $dir."sam/samtools index $sampleName.sorted.bam"; print "$command\n"; $status = system($command); if ($status != 0) { @@ -280,11 +281,12 @@ if ($genBamF) { print "\n"; } -&collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level -&collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level +&collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level +&collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level if ($calcCI) { - $command = $dir."rsem-run-gibbs $refName $taskName $imdName $BURNIN $CHAINLEN $SAMPLEGAP"; + $command = $dir."rsem-run-gibbs $refName $sampleName $sampleToken $BURNIN $CHAINLEN $SAMPLEGAP"; +# $command .= " -p $nThreads"; if ($quiet) { $command .= " -q"; } print "$command\n"; $status = system($command); @@ -294,12 +296,12 @@ if ($calcCI) { } print "\n"; - system("mv $taskName.isoforms.results $imdName.isoforms.results.bak1"); - system("mv $taskName.genes.results $imdName.genes.results.bak1"); - &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level - &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level + system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak1"); + system("mv $sampleName.genes.results $imdName.genes.results.bak1"); + &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level + &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level - $command = $dir."rsem-calculate-credibility-intervals $refName $taskName $imdName $CONFIDENCE $NSPC $NMB"; + $command = $dir."rsem-calculate-credibility-intervals $refName $sampleName $sampleToken $CONFIDENCE $NSPC $NMB"; if ($quiet) { $command .= " -q"; } print "$command\n"; $status = system($command); @@ -309,14 +311,14 @@ if ($calcCI) { } print "\n"; - system("mv $taskName.isoforms.results $imdName.isoforms.results.bak2"); - system("mv $taskName.genes.results $imdName.genes.results.bak2"); - &collectResults("$imdName.iso_res", "$taskName.isoforms.results"); # isoform level - &collectResults("$imdName.gene_res", "$taskName.genes.results"); # gene level + system("mv $sampleName.isoforms.results $imdName.isoforms.results.bak2"); + system("mv $sampleName.genes.results $imdName.genes.results.bak2"); + &collectResults("$imdName.iso_res", "$sampleName.isoforms.results"); # isoform level + &collectResults("$imdName.gene_res", "$sampleName.genes.results"); # gene level } if (!$keep_intermediate_files) { - $status = system ("rm -rf $tmp_dir"); + $status = system ("rm -rf $temp_dir"); if ($status != 0) { print "Fail to delete the temporary folder!\n"; exit(-1); @@ -344,7 +346,7 @@ sub collectResults { ++$cnt; chomp($line); my @local_arr = split(/\t/, $line); - if ($cnt == 3) { @comment = @local_arr; } + if ($cnt == 4) { @comment = @local_arr; } else { push(@results, \@local_arr); } } @@ -396,7 +398,7 @@ Comma-separated list of files containing downstream reads which are paired with =item B -SAM/BAM formatted input file. If "-" is specified for the filename, SAM/BAM input is instead assumed to come from standard input. SAM/BAM format used is SAM Spec v1.2. RSEM requires all alignments of the same read group together. For paired-end reads, RSEM also requires the two mates of any alignment be adjacent. See Description section for how to make input file obey RSEM's requirements. +SAM/BAM formatted input file. If "-" is specified for the filename, SAM/BAM input is instead assumed to come from standard input. RSEM requires all alignments of the same read group together. For paired-end reads, RSEM also requires the two mates of any alignment be adjacent. See Description section for how to make input file obey RSEM's requirements. =item B @@ -538,7 +540,7 @@ One simple way to make the alignment file (e.g. input.sam) satisfying RSEM's req sort -k 1,1 -s input.sam > input.sorted.sam -The SAM/BAM format RSEM uses is v1.2. +The SAM/BAM format RSEM uses is v1.3. However, it is compatible with old SAM/BAM format. The user must run 'rsem-prepare-reference' with the appropriate reference before using this program. @@ -582,11 +584,6 @@ file. If no other attributes are given or no GTF file is provided in 'rsem-prepare-reference', there will be no tab after the tau_value field. -=item B and B - -Output files used by RSEM internally for tasks like simulation, -compute credibility intervals etc. - =item B Only generated when --out-bam is specified. @@ -604,6 +601,9 @@ representing the posterior probability. 'sample_name.sorted.bam' and 'sample_name.sorted.bam.bai' are the sorted BAM file and indices generated by samtools (included in RSEM package). +=item B + +This is a folder instead of a file. All model related statistics are stored in this folder. Use 'rsem-plot-model' can generate plots using this folder. =back @@ -661,3 +661,5 @@ Assume the path to the bowtie executables is in the user's PATH environment vari mmliver_paired_end_quals =cut + +# LocalWords: usr