From: westcott Date: Fri, 26 Feb 2010 15:08:15 +0000 (+0000) Subject: filter.seqs can now accept multiple fasta files X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=251a6e0e38cce0c439757ec39f0a84a5d0bd0222;p=mothur.git filter.seqs can now accept multiple fasta files --- diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index d45d916..91d1181 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -15,6 +15,7 @@ FilterSeqsCommand::FilterSeqsCommand(string option) { try { abort = false; + filterFileName = ""; //allow user to run help if(option == "help") { help(); abort = true; } @@ -58,16 +59,45 @@ FilterSeqsCommand::FilterSeqsCommand(string option) { } //check for required parameters - fastafile = validParameter.validFile(parameters, "fasta", true); - if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the filter.seqs command."); m->mothurOutEndLine(); abort = true; } - else if (fastafile == "not open") { abort = true; } - - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it - } + fasta = validParameter.validFile(parameters, "fasta", false); + if (fasta == "not found") { m->mothurOut("fasta is a required parameter for the filter.seqs command."); m->mothurOutEndLine(); abort = true; } + else { + splitAtDash(fasta, fastafileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < fastafileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(fastafileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { fastafileNames[i] = inputDir + fastafileNames[i]; } + } + int ableToOpen; + ifstream in; + ableToOpen = openInputFile(fastafileNames[i], in); + if (ableToOpen == 1) { + m->mothurOut(fastafileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + //erase from file list + fastafileNames.erase(fastafileNames.begin()+i); + i--; + }else{ + string simpleName = getSimpleName(fastafileNames[i]); + filterFileName += simpleName.substr(0, simpleName.find_first_of('.')); + } + in.close(); + } + + //make sure there is at least one valid file left + if (fastafileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; } + } + + if (!abort) { + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += hasPath(fastafileNames[0]); //if user entered a file with a path then preserve it + } + } //check for optional parameter and set defaults // ...at some point should added some additional type checking... @@ -107,7 +137,8 @@ void FilterSeqsCommand::help(){ try { m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n"); m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n"); - m->mothurOut("The fasta parameter is required.\n"); + m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n"); + m->mothurOut("For example: fasta=abrecovery.fasta-amazon.fasta \n"); m->mothurOut("The trump parameter .... The default is ...\n"); m->mothurOut("The soft parameter .... The default is ....\n"); m->mothurOut("The hard parameter .... The default is ....\n"); @@ -130,13 +161,14 @@ int FilterSeqsCommand::execute() { try { if (abort == true) { return 0; } + vector outputNames; ifstream inFASTA; - openInputFile(fastafile, inFASTA); + openInputFile(fastafileNames[0], inFASTA); Sequence testSeq(inFASTA); alignmentLength = testSeq.getAlignLength(); - inFASTA.seekg(0); + inFASTA.close(); F.setLength(alignmentLength); @@ -148,18 +180,23 @@ int FilterSeqsCommand::execute() { else { F.setFilter(string(alignmentLength, '1')); } if(trump != '*' || isTrue(vertical) || soft != 0){ - while(!inFASTA.eof()){ //read through and create the filter... - Sequence seq(inFASTA); - if (seq.getName() != "") { - if(trump != '*'){ F.doTrump(seq); } - if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); } - numSeqs++; - cout.flush(); + for (int i = 0; i < fastafileNames.size(); i++) { + ifstream in; + openInputFile(fastafileNames[i], in); + + while(!in.eof()){ //read through and create the filter... + Sequence seq(in); + if (seq.getName() != "") { + if(trump != '*'){ F.doTrump(seq); } + if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); } + numSeqs++; + cout.flush(); + } } + in.close(); } } - inFASTA.close(); F.setNumSeqs(numSeqs); @@ -169,38 +206,43 @@ int FilterSeqsCommand::execute() { filter = F.getFilter(); ofstream outFilter; - string filterFile = outputDir + getRootName(getSimpleName(fastafile)) + "filter"; + + string filterFile = outputDir + filterFileName + ".filter"; openOutputFile(filterFile, outFilter); outFilter << filter << endl; outFilter.close(); + outputNames.push_back(filterFile); - ifstream inFasta2; - openInputFile(fastafile, inFasta2); - string filteredFasta = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; - ofstream outFASTA; - openOutputFile(filteredFasta, outFASTA); - numSeqs = 0; - while(!inFasta2.eof()){ - Sequence seq(inFasta2); - if (seq.getName() != "") { - string align = seq.getAligned(); - string filterSeq = ""; - - for(int j=0;j' << seq.getName() << endl << filterSeq << endl; + numSeqs++; } - - outFASTA << '>' << seq.getName() << endl << filterSeq << endl; - numSeqs++; + gobble(in); } - gobble(inFasta2); + outFASTA.close(); + in.close(); } - outFASTA.close(); - inFasta2.close(); - int filteredLength = 0; for(int i=0;imothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(filterFile); m->mothurOutEndLine(); - m->mothurOut(filteredFasta); m->mothurOutEndLine(); + for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; diff --git a/filterseqscommand.h b/filterseqscommand.h index 0946396..e8af5ec 100644 --- a/filterseqscommand.h +++ b/filterseqscommand.h @@ -23,7 +23,8 @@ public: void help(); private: - string vertical, filter, fastafile, hard, outputDir; + string vertical, filter, fasta, hard, outputDir, filterFileName; + vector fastafileNames; int alignmentLength; char trump;