X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=a09e40246ef1c71594b7023410f35c7fbb52d22f;hb=d9b668f68b99f92ecdc71dd8cd363cb4e27107f9;hp=320bc41f4816eccb8f4b0fb19cb92864e91055d2;hpb=a76d81690125ca57d7f602ac93abad75cf9796c2;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 320bc41..a09e402 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -10,6 +10,57 @@ #include "trimseqscommand.h" #include "needlemanoverlap.hpp" +//********************************************************************************************************************** +vector TrimSeqsCommand::getValidParameters(){ + try { + string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", + "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +TrimSeqsCommand::TrimSeqsCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["qual"] = tempOutNames; + outputTypes["group"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector TrimSeqsCommand::getRequiredParameters(){ + try { + string Array[] = {"fasta"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector TrimSeqsCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles"); + exit(1); + } +} //*************************************************************************************************************** TrimSeqsCommand::TrimSeqsCommand(string option) { @@ -23,7 +74,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else { //valid paramters for this command - string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", + string AlignArray[] = {"fasta", "flip", "group","oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -39,6 +90,12 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["qual"] = tempOutNames; + outputTypes["group"] = tempOutNames; + //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } @@ -67,12 +124,20 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["qfile"] = inputDir + it->second; } } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } } //check for required parameters fastaFile = validParameter.validFile(parameters, "fasta", true); - if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; } + if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; } else if (fastaFile == "not open") { abort = true; } //if the user changes the output directory command factory will send this info to us in the output parameter @@ -94,6 +159,11 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else if(temp == "not open"){ abort = true; } else { oligoFile = temp; } + temp = validParameter.validFile(parameters, "group", true); + if (temp == "not found"){ groupfile = ""; } + else if(temp == "not open"){ abort = true; } + else { groupfile = temp; } + temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } convert(temp, maxAmbig); @@ -149,8 +219,13 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); - if(allFiles && oligoFile == ""){ - m->mothurOut("You selected allfiles, but didn't enter an oligos file. Ignoring the allfiles request."); m->mothurOutEndLine(); + if ((oligoFile != "") && (groupfile != "")) { + m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true; + } + + + if(allFiles && (oligoFile == "") && (groupfile == "")){ + m->mothurOut("You selected allfiles, but didn't enter an oligos or group file. Ignoring the allfiles request."); m->mothurOutEndLine(); } if((qAverage != 0 && qThreshold != 0) && qFileName == ""){ m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine(); @@ -174,8 +249,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { void TrimSeqsCommand::help(){ try { m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n"); - m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); + m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); m->mothurOut("The fasta parameter is required.\n"); + m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n"); m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"); m->mothurOut("The oligos parameter .... The default is "".\n"); m->mothurOut("The maxambig parameter .... The default is -1.\n"); @@ -218,20 +294,47 @@ int TrimSeqsCommand::execute(){ numFPrimers = 0; //this needs to be initialized numRPrimers = 0; + vector fastaFileNames; + vector qualFileNames; string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta"; - outputNames.push_back(trimSeqFile); + outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta"; - outputNames.push_back(scrapSeqFile); + outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile); string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual"; string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual"; - if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); } - string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; + if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); } + string groupFile = ""; + if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; } + else{ + groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups"; + outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + + if(allFiles){ + for (int i = 0; i < groupMap->namesOfGroups.size(); i++) { + groupToIndex[groupMap->namesOfGroups[i]] = i; + groupVector.push_back(groupMap->namesOfGroups[i]); + fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupMap->namesOfGroups[i] + ".fasta")); + + //we append later, so we want to clear file + ofstream outRemove; + m->openOutputFile(fastaFileNames[i], outRemove); + outRemove.close(); + if(qFileName != ""){ + qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupMap->namesOfGroups[i] + ".qual")); + ofstream outRemove2; + m->openOutputFile(qualFileNames[i], outRemove2); + outRemove2.close(); + } + } + } + comboStarts = fastaFileNames.size()-1; + } - vector fastaFileNames; - vector qualFileNames; if(oligoFile != ""){ - outputNames.push_back(groupFile); + outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); getOligos(fastaFileNames, qualFileNames); } @@ -327,7 +430,7 @@ int TrimSeqsCommand::execute(){ if (m->control_pressed) { return 0; } #endif - + for(int i=0;iisBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); } @@ -339,7 +442,7 @@ int TrimSeqsCommand::execute(){ ofstream outGroups; string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups"; m->openOutputFile(outGroupFilename, outGroups); - outputNames.push_back(outGroupFilename); + outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename); string thisGroup = ""; if (i > comboStarts) { @@ -425,22 +528,40 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } ofstream outGroups; - vector fastaFileNames; - vector qualFileNames; + //vector fastaFileNames; + //vector qualFileNames; if (oligoFile != "") { m->openOutputFile(groupFile, outGroups); for (int i = 0; i < fastaNames.size(); i++) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp"); + //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + //clear old file if it exists + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); if(qFileName != ""){ - qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp"); + //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + //clear old file if it exists + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); } #else - fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + fastaNames[i] = (fastaNames[i] + toString(i) + ".temp"); + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); if(qFileName != ""){ - qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + qualNames[i] = (qualNames[i] + toString(i) + ".temp"); + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); } #endif } @@ -462,11 +583,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string inFASTA.close(); outFASTA.close(); scrapFASTA.close(); if (oligoFile != "") { outGroups.close(); } - for(int i=0;iclose(); delete fastaFileNames[i]; } + //for(int i=0;iclose(); delete fastaFileNames[i]; } if(qFileName != ""){ qFile.close(); - for(int i=0;iclose(); delete qualFileNames[i]; } + //for(int i=0;iclose(); delete qualFileNames[i]; } } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } @@ -553,10 +674,44 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } outGroups << currSeq.getName() << '\t' << thisGroup << endl; if(allFiles){ - currSeq.printSequence(*fastaFileNames[indexToFastaFile]); + ofstream outTemp; + m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp); + //currSeq.printSequence(*fastaFileNames[indexToFastaFile]); + currSeq.printSequence(outTemp); + outTemp.close(); if(qFileName != ""){ - currQual.printQScores(*qualFileNames[indexToFastaFile]); + //currQual.printQScores(*qualFileNames[indexToFastaFile]); + ofstream outTemp2; + m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2); + currQual.printQScores(outTemp2); + outTemp2.close(); + } + } + } + + if (groupfile != "") { + string thisGroup = groupMap->getGroup(currSeq.getName()); + + if (thisGroup != "not found") { + outGroups << currSeq.getName() << '\t' << thisGroup << endl; + if (allFiles) { + ofstream outTemp; + m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp); + currSeq.printSequence(outTemp); + outTemp.close(); + if(qFileName != ""){ + ofstream outTemp2; + m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2); + currQual.printQScores(outTemp2); + outTemp2.close(); + } + } + }else{ + m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine(); + outGroups << currSeq.getName() << '\t' << "XXX" << endl; + if (allFiles) { + m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine(); } } } @@ -571,9 +726,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string count++; } - unsigned long int pos = inFASTA.tellg(); - if ((pos == -1) || (pos >= line->end)) { break; } - + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + unsigned long int pos = inFASTA.tellg(); + if ((pos == -1) || (pos >= line->end)) { break; } + #else + if (inFASTA.eof()) { break; } + #endif + //report progress if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } @@ -588,17 +747,17 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (oligoFile != "") { outGroups.close(); } if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); } - for(int i=0;iclose(); - delete fastaFileNames[i]; - } + //for(int i=0;iclose(); + // delete fastaFileNames[i]; + //} - if(qFileName != ""){ - for(int i=0;iclose(); - delete qualFileNames[i]; - } - } + //if(qFileName != ""){ + //for(int i=0;iclose(); + //delete qualFileNames[i]; + //} + //} return count; } @@ -627,7 +786,11 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName }else if (pid == 0){ driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]); exit(0); - }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } } //force parent to wait until all the processes are done @@ -671,7 +834,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vectoropenInputFile(qfilename, inQual); - + string input; while(!inQual.eof()){ input = m->getline(inQual); @@ -697,6 +860,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine(); @@ -787,8 +951,10 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out } }else { outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); if(qFileName != ""){ outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); } } } @@ -810,11 +976,13 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out groupVector.push_back(group); if(allFiles){ + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); if(qFileName != ""){ outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); } } }else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } @@ -831,12 +999,14 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out for (map::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) { if (groupVector[itPrime->second] != "") { //there is a group for this primer outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); + outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta")); combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1; if(qFileName != ""){ outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); + outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual")); } } }