From: Bo Li Date: Thu, 5 Jun 2014 07:20:01 +0000 (-0500) Subject: Generated two index multi-Fasta files instead, one without 'N' converted to 'G' and... X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=commitdiff_plain;h=ca00325bea501441a98ecc860e2914c4f297d3d6 Generated two index multi-Fasta files instead, one without 'N' converted to 'G' and one with 'N' converted to 'G'. --- diff --git a/preRef.cpp b/preRef.cpp index 7518ec4..da6c84e 100644 --- a/preRef.cpp +++ b/preRef.cpp @@ -23,18 +23,17 @@ PolyARules rules; Refs refs; ofstream fout; -char refF[STRLEN], alignerFastaF[STRLEN], transF[STRLEN]; +char refF[STRLEN], idxF[STRLEN], n2g_idxF[STRLEN]; int polyAChoice, polyALen; char exceptionF[STRLEN]; -bool ntog; // true , change N into G; false do not change. Default is true. bool quiet; // verbose = !quiet; // always generate references for aligners, default convert all N into G int main(int argc, char* argv[]) { if (argc < 4) { - printf("USAGE : rsem-preref refFastaF polyAChoice refName [-l polyALen] [-f exceptionF] [--no-ntog] [-q]\n\n"); + printf("USAGE : rsem-preref refFastaF polyAChoice refName [-l polyALen] [-f exceptionF] [-q]\n\n"); printf(" refFastaF: a FASTA format file contains all reference transcripts\n"); printf(" polyAChoice: choice for polyA tail padding.It is a number from {0,1,2}\n"); printf(" 0: pad polyA tail\n"); @@ -42,9 +41,7 @@ int main(int argc, char* argv[]) { printf(" 2: pad polyA tail for all references but those in exceptionF\n"); printf(" -l: polyALen: specify the length of polyA tail you want to pad. Default is 100\n"); printf(" -f: exceptionF: file contains a list of exception reference ids. IDs starts from 1. Must set if polyAChoice = 2\n"); - printf(" --no-ntog: do not convert N in references into G\n"); printf(" -q: quiet\n"); - printf(" This program will generate a file named \"refName.transcripts.fa\", which may rewrite an existing file (e.g. refFastaF).\n"); exit(-1); } @@ -58,7 +55,6 @@ int main(int argc, char* argv[]) { for (int i = 4; i < argc; i++) { if (!strcmp(argv[i], "-l")) { polyALen = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-f")) { strcpy(exceptionF, argv[i + 1]); } - if (!strcmp(argv[i], "--no-ntog")) { ntog = false; } if (!strcmp(argv[i], "-q")) { quiet = true; } } @@ -73,21 +69,21 @@ int main(int argc, char* argv[]) { sprintf(refF, "%s.seq", argv[3]); refs.saveRefs(refF); - sprintf(transF, "%s.transcripts.fa", argv[3]); - fout.open(transF); + sprintf(idxF, "%s.idx.fa", argv[3]); + fout.open(idxF); for (int i = 1; i <= M; i++) { fout<< ">"<< refs.getRef(i).getName()<< endl<< refs.getRef(i).getSeq()<< endl; } fout.close(); - if (verbose) printf("%s is generated!\n", transF); + if (verbose) printf("%s is generated!\n", idxF); - sprintf(alignerFastaF, "%s.idx.fa", argv[3]); - fout.open(alignerFastaF); + sprintf(n2g_idxF, "%s.n2g.idx.fa", argv[3]); + fout.open(ng2_idxF); for (int i = 1; i <= M; i++) { - fout<<">"<"<< refs.getRef(i).getName()<< endl<< aligner_refp.convert(refs.getRef(i).getSeq())<< endl; } fout.close(); - if (verbose) printf("%s is generated!\n", alignerFastaF); + if (verbose) printf("%s is generated!\n", n2g_idxF); return 0; } diff --git a/rsem-prepare-reference b/rsem-prepare-reference index a74d97d..177cc43 100755 --- a/rsem-prepare-reference +++ b/rsem-prepare-reference @@ -19,9 +19,9 @@ my $polyAChoice = 0; # 0, default, pad polyA tails for all isoforms; 1, --no-pol my $no_polyA = 0; # for option --no-polyA my $subsetFile = ""; my $polyALen = 125; +my $ntog = 0; +my $bowtie = 0; my $bowtie_path = ""; -my $no_bowtie = 0; -my $no_ntog = 0; my $bowtie2 = 0; my $bowtie2_path = ""; my $quiet = 0; @@ -35,9 +35,9 @@ GetOptions("gtf=s" => \$gtfF, "no-polyA" => \$no_polyA, "no-polyA-subset=s" => \$subsetFile, "polyA-length=i" => \$polyALen, + "ntog" => \$no_ntog, + "bowtie" => \$bowtie, "bowtie-path=s" => \$bowtie_path, - "no-bowtie" => \$no_bowtie, - "no-ntog" => \$no_ntog, "bowtie2" => \$bowtie2, "bowtie2-path=s" => \$bowtie2_path, "q|quiet" => \$quiet, @@ -49,12 +49,8 @@ pod2usage(-msg => "--transcript-to-gene-map and --allele-to-gene-map are mutuall pod2usage(-msg => "--gtf and --allele-to-gene-map are mutually exclusive!", -exitval => 2, -verbose => 2) if (($gtfF ne "") && ($alleleMappingF ne "")); pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 2); -if ($bowtie2) { $no_bowtie = 1; $no_ntog = 1; } - -pod2usage(-msg => "If bowtie is used, --no-ntog cannot be set!", -exitval => 2, -verbose => 2) if (!$no_bowtie && $no_ntog); - -if ($no_bowtie && ($bowtie_path ne "")) { print "Warning: If bowtie is not used, no need to set --bowtie-path option!\n"; } -if (!$bowtie2 && ($bowtie2_path ne "")) { print "Warning: If bowtie2 is not used, no need to set --bowtie2-path option!\n"; } +if (!$bowtie && ($bowtie_path ne "")) { print "Warning: If Bowtie is not used, no need to set --bowtie-path option!\n"; } +if (!$bowtie2 && ($bowtie2_path ne "")) { print "Warning: If Bowtie 2 is not used, no need to set --bowtie2-path option!\n"; } my $type;