X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=5e0541f9cb954575d39d277cdf4a50d6931c21c2;hb=def6801aad4aadbbaa7cc615b11554e47dad5ce0;hp=a09e40246ef1c71594b7023410f35c7fbb52d22f;hpb=d9b668f68b99f92ecdc71dd8cd363cb4e27107f9;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index a09e402..5e0541f 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -337,7 +337,7 @@ int TrimSeqsCommand::execute(){ outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); getOligos(fastaFileNames, qualFileNames); } - + vector fastaFilePos; vector qFilePos; @@ -351,90 +351,20 @@ int TrimSeqsCommand::execute(){ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ - driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); - - for (int j = 0; j < fastaFileNames.size(); j++) { - rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str()); - } - if(qFileName != ""){ - for (int j = 0; j < qualFileNames.size(); j++) { - rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str()); - } - } - }else{ createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); - - rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str()); - rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str()); - rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str()); - - if(qFileName != ""){ - rename((trimQualFile + toString(processIDS[0]) + ".temp").c_str(), trimQualFile.c_str()); - rename((scrapQualFile + toString(processIDS[0]) + ".temp").c_str(), scrapQualFile.c_str()); - } - - - for (int j = 0; j < fastaFileNames.size(); j++) { - rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str()); - } - if(qFileName != ""){ - for (int j = 0; j < qualFileNames.size(); j++) { - rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str()); - } - } - - //append files - for(int i=1;iappendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile); - remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str()); - m->appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile); - remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str()); - - m->appendFiles((trimQualFile + toString(processIDS[i]) + ".temp"), trimQualFile); - remove((trimQualFile + toString(processIDS[i]) + ".temp").c_str()); - m->appendFiles((scrapQualFile + toString(processIDS[i]) + ".temp"), scrapQualFile); - remove((scrapQualFile + toString(processIDS[i]) + ".temp").c_str()); - - m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile); - remove((groupFile + toString(processIDS[i]) + ".temp").c_str()); - for (int j = 0; j < fastaFileNames.size(); j++) { - m->appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]); - remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str()); - } - - if(qFileName != ""){ - for (int j = 0; j < qualFileNames.size(); j++) { - m->appendFiles((qualFileNames[j] + toString(processIDS[i]) + ".temp"), qualFileNames[j]); - remove((qualFileNames[j] + toString(processIDS[i]) + ".temp").c_str()); - } - } - - - } - } - - if (m->control_pressed) { return 0; } + } #else driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); - - for (int j = 0; j < fastaFileNames.size(); j++) { - rename((fastaFileNames[j] + toString(j) + ".temp").c_str(), fastaFileNames[j].c_str()); - } - if(qFileName != ""){ - for (int j = 0; j < qualFileNames.size(); j++) { - rename((qualFileNames[j] + toString(j) + ".temp").c_str(), qualFileNames[j].c_str()); - } - } - - if (m->control_pressed) { return 0; } #endif - + + if (m->control_pressed) { return 0; } for(int i=0;iisBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); } - else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); } + + if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); } + else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); } else { ifstream inFASTA; string seqName; @@ -451,7 +381,7 @@ int TrimSeqsCommand::execute(){ if(itCombo->second == i){ thisGroup = itCombo->first; combos.erase(itCombo); break; } } }else{ thisGroup = groupVector[i]; } - + while(!inFASTA.eof()){ if(inFASTA.get() == '>'){ inFASTA >> seqName; @@ -466,8 +396,8 @@ int TrimSeqsCommand::execute(){ if(qFileName != ""){ for(int i=0;iisBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str()); } - else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); } + if (m->isBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str()); } + else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); } else { ifstream inQual; string seqName; @@ -528,43 +458,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } ofstream outGroups; - //vector fastaFileNames; - //vector qualFileNames; if (oligoFile != "") { m->openOutputFile(groupFile, outGroups); - for (int i = 0; i < fastaNames.size(); i++) { - - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp"); - //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); - //clear old file if it exists - ofstream temp; - m->openOutputFile(fastaNames[i], temp); - temp.close(); - if(qFileName != ""){ - qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp"); - //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); - //clear old file if it exists - ofstream temp2; - m->openOutputFile(qualNames[i], temp2); - temp2.close(); - } - #else - //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); - fastaNames[i] = (fastaNames[i] + toString(i) + ".temp"); - ofstream temp; - m->openOutputFile(fastaNames[i], temp); - temp.close(); - if(qFileName != ""){ - //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate)); - qualNames[i] = (qualNames[i] + toString(i) + ".temp"); - ofstream temp2; - m->openOutputFile(qualNames[i], temp2); - temp2.close(); - } - #endif - } } ifstream inFASTA; @@ -574,6 +470,19 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string ifstream qFile; if(qFileName != "") { m->openInputFile(qFileName, qFile); qFile.seekg(qline->start); } + + for (int i = 0; i < fastaNames.size(); i++) { //clears old file + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); + } + for (int i = 0; i < qualNames.size(); i++) { //clears old file + ofstream temp; + m->openOutputFile(qualNames[i], temp); + temp.close(); + } + + bool done = false; int count = 0; @@ -582,12 +491,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (m->control_pressed) { inFASTA.close(); outFASTA.close(); scrapFASTA.close(); if (oligoFile != "") { outGroups.close(); } - - //for(int i=0;iclose(); delete fastaFileNames[i]; } if(qFileName != ""){ qFile.close(); - //for(int i=0;iclose(); delete qualFileNames[i]; } } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } @@ -747,18 +653,6 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (oligoFile != "") { outGroups.close(); } if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); } - //for(int i=0;iclose(); - // delete fastaFileNames[i]; - //} - - //if(qFileName != ""){ - //for(int i=0;iclose(); - //delete qualFileNames[i]; - //} - //} - return count; } catch(exception& e) { @@ -772,7 +666,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector fastaNames, vector qualNames) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - int process = 0; + int process = 1; int exitCommand = 1; processIDS.clear(); @@ -784,6 +678,21 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ + for (int i = 0; i < fastaNames.size(); i++) { + fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp"); + //clear old file if it exists + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); + if(qFileName != ""){ + qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp"); + //clear old file if it exists + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); + } + } + driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]); exit(0); }else { @@ -793,12 +702,69 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName } } + //parent do my part + for (int i = 0; i < fastaNames.size(); i++) { + //clear old file if it exists + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); + if(qFileName != ""){ + //clear old file if it exists + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); + } + } + + driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]); + + //force parent to wait until all the processes are done - for (int i=0;imothurOut("Appending files from process " + processIDS[i]); m->mothurOutEndLine(); + + m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile); + remove((trimFile + toString(processIDS[i]) + ".temp").c_str()); + m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile); + remove((scrapFile + toString(processIDS[i]) + ".temp").c_str()); + + m->mothurOut("Done with fasta files"); m->mothurOutEndLine(); + + if(qFileName != ""){ + m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile); + remove((trimQFile + toString(processIDS[i]) + ".temp").c_str()); + m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile); + remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str()); + + m->mothurOut("Done with quality files"); m->mothurOutEndLine(); + } + + m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile); + remove((groupFile + toString(processIDS[i]) + ".temp").c_str()); + + m->mothurOut("Done with group file"); m->mothurOutEndLine(); + + for (int j = 0; j < fastaNames.size(); j++) { + m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]); + remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str()); + } + + if(qFileName != ""){ + for (int j = 0; j < qualNames.size(); j++) { + m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]); + remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str()); + } + } + + if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); } + } + return exitCommand; #endif } @@ -905,7 +871,7 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out //int indexPrimer = 0; while(!inOligos.eof()){ - inOligos >> type; + inOligos >> type; m->gobble(inOligos); if(type[0] == '#'){ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there @@ -936,29 +902,29 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out map::iterator itPrime = primers.find(oligo); if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - primers[oligo]=index; index++; - groupVector.push_back(group); + primers[oligo]=index; index++; + groupVector.push_back(group); - if(allFiles){ - outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - if(qFileName != ""){ - outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - } - if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct - filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + if(allFiles){ + outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); if(qFileName != ""){ - filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + } + if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct + filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + if(qFileName != ""){ + filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + } + }else { + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + if(qFileName != ""){ + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + } } - }else { - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - if(qFileName != ""){ - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - } } - } - + } else if(type == "REVERSE"){ Sequence oligoRC("reverse", oligo); @@ -972,19 +938,20 @@ void TrimSeqsCommand::getOligos(vector& outFASTAVec, vector& out map::iterator itBar = barcodes.find(oligo); if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); } - barcodes[oligo]=index; index++; - groupVector.push_back(group); + barcodes[oligo]=index; index++; + groupVector.push_back(group); + + if(allFiles){ + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); + if(qFileName != ""){ + outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); + } + } - if(allFiles){ - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta")); - if(qFileName != ""){ - outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual")); - } - } }else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); } } m->gobble(inOligos);