11 GetOptions("hard-threshold" => \$hard,
12 "soft-threshold" => \$soft,
13 "h|help" => \$help) or pod2usage(-exitval => 2, -verbose => 2);
15 pod2usage(-verbose => 2) if ($help == 1);
16 pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 3);
17 pod2usage(-msg => "--hard-threshold and --soft-threshold cannot be set at the same time!", -exitval => 2, -verbose => 2) if ($hard && $soft);
19 if ($hard == 0 && $soft == 0) { $hard = 1; }
23 open(INPUT, "$ARGV[0]");
24 open(OUTPUT, ">$ARGV[2]");
28 my @columns = split(/\t/, $header);
31 while ($pos <= $#columns && $columns[$pos] ne "\"PPDE\"") { ++$pos; }
32 if ($pos > $#columns) { print "Error: Cannot find column PPDE!\n"; exit(-1); }
35 print OUTPUT "$header\n";
37 my ($n, $sum) = (0, 0);
39 while($line = <INPUT>) {
41 my @fields = split(/\t/, $line);
42 my $ppee = 1.0 - $fields[$pos];
44 if ($ppee > $fdr) { last; }
46 print OUTPUT "$line\n";
49 if ($sum + $ppee > $fdr * ($n + 1)) { last; }
52 print OUTPUT "$line\n";
56 print "There are $n genes/transcripts reported at FDR = $fdr.\n";
69 rsem-control-fdr [options] input_file fdr_rate output_file
77 This should be the result file generated by 'rsem-run-ebseq', which contains all genes/transcripts and their associated statistics.
81 The desire false discovery rate (FDR).
85 This file is a subset of the 'input_file'. It only contains the genes/transcripts called as differentially expressed (DE). When more than 2 conditions exist, DE is defined as not all conditions are equally expressed.
93 =item B<--hard-threshold>
95 Use hard threshold method to control FDR. If this option is set, only those genes/transcripts with their PPDE >= 1 - fdr_rate are called as DE. (Default: on)
97 =item B<--soft-threshold>
99 Use soft threshold method to control FDR. If this option is set, this program will try to report as many genes/transcripts as possible, as long as their average PPDE >= 1 - fdr_rate. (Default: off)
103 Show help information.
109 This program controls the false discovery rate and reports differentially expressed genes/transcripts.
113 We assume that we have 'GeneMat.results' as input. We want to control FDR at 0.05 using hard threshold method and name the output file as 'GeneMat.de.txt':
115 rsem-control-fdr GeneMat.results 0.05 GeneMat.de.txt
117 If we instead want to use soft threshold method to obtain more called genes/transcripts, we can use:
119 rsem-control-fdr --soft-threshold GeneMat.results 0.05 GeneMat.de.txt