From: westcott Date: Fri, 4 Mar 2011 15:45:49 +0000 (+0000) Subject: worked on trim.seqs - added in the groupfiles for allfiles=t, cleaned up the outputNa... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=c3396974063d6efc5e5850ddf4ed8ab65cc94bb9 worked on trim.seqs - added in the groupfiles for allfiles=t, cleaned up the outputNames, added sequence group counts --- diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index d1b9a5c..abfe15a 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -322,7 +322,6 @@ int TrimSeqsCommand::execute(){ } string outputGroupFileName; - if(oligoFile != ""){ outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); @@ -352,28 +351,68 @@ int TrimSeqsCommand::execute(){ if (m->control_pressed) { return 0; } - if(allFiles){ + //clear out all old group files + map uniqueFastaNames;// so we don't add the same groupfile multiple times + map::iterator it; + set namesToRemove; for(int i=0;iisBlank(fastaFileNames[i][j])){ - remove(fastaFileNames[i][j].c_str()); - - if(qFileName != ""){ + if (fastaFileNames[i][j] != "") { + if(m->isBlank(fastaFileNames[i][j])){ remove(fastaFileNames[i][j].c_str()); + namesToRemove.insert(fastaFileNames[i][j]); + + if(qFileName != ""){ + remove(qualFileNames[i][j].c_str()); + namesToRemove.insert(qualFileNames[i][j]); + } + }else{ + it = uniqueFastaNames.find(fastaFileNames[i][j]); + if (it == uniqueFastaNames.end()) { + uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i]; + } } - } } } + + //remove names for outputFileNames, just cleans up the output + vector outputNames2; + for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } } + outputNames = outputNames2; + + for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) { + ifstream in; + m->openInputFile(it->first, in); + + ofstream out; + string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups"; + outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + m->openOutputFile(thisGroupName, out); + + while (!in.eof()){ + if (m->control_pressed) { break; } + + Sequence currSeq(in); m->gobble(in); + out << currSeq.getName() << '\t' << it->second << endl; + } + in.close(); + out.close(); + } } - - - if (m->control_pressed) { - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + //output group counts + m->mothurOutEndLine(); + int total = 0; + for (int i = 0; i < barcodeNameVector.size(); i++) { + if ((barcodeNameVector[i] != "") && (groupCounts[i] != 0)) { total += groupCounts[i]; m->mothurOut("Group " + barcodeNameVector[i] + " contains " + toString(groupCounts[i]) + " sequences."); m->mothurOutEndLine(); } } + if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -410,14 +449,15 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string ofstream outGroupsFile; if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); } - if(allFiles){ for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file - ofstream temp; - m->openOutputFile(fastaFileNames[i][j], temp); temp.close(); - if(qFileName != ""){ - m->openOutputFile(qualFileNames[i][j], temp); temp.close(); + if (fastaFileNames[i][j] != "") { + ofstream temp; + m->openOutputFile(fastaFileNames[i][j], temp); temp.close(); + if(qFileName != ""){ + m->openOutputFile(qualFileNames[i][j], temp); temp.close(); + } } } } @@ -541,6 +581,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(barcodes.size() != 0){ outGroupsFile << currSeq.getName() << '\t' << barcodeNameVector[barcodeIndex] << endl; + groupCounts[barcodeIndex]++; } @@ -624,12 +665,14 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName for(int i=0;iopenOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); - - if(qFileName != ""){ - tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; - m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + if (tempFASTAFileNames[i][j] != "") { + tempFASTAFileNames[i][j] += toString(getpid()) + ".temp"; + m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); + + if(qFileName != ""){ + tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; + m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + } } } } @@ -647,6 +690,15 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName lines[process], qLines[process]); + //pass groupCounts to parent + ofstream out; + string tempFile = filename + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + for(int i = 0; i < groupCounts.size(); i++) { + out << groupCounts[i] << endl; + } + out.close(); + exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); @@ -662,11 +714,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName m->openOutputFile(trimQualFileName, temp); temp.close(); m->openOutputFile(scrapQualFileName, temp); temp.close(); - - driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); - //force parent to wait until all the processes are done for (int i=0;iappendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]); - remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); - - if(qFileName != ""){ - m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]); - remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + if (fastaFileNames[j][k] != "") { + m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]); + remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + + if(qFileName != ""){ + m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]); + remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + } } } } } + ifstream in; + string tempFile = filename + toString(processIDS[i]) + ".num.temp"; + m->openInputFile(tempFile, in); + int count = 0; + int tempNum; + while (!in.eof()) { + in >> tempNum; m->gobble(in); + groupCounts[count] += tempNum; + count++; + } + in.close(); remove(tempFile.c_str()); + } return exitCommand; @@ -894,6 +957,7 @@ void TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< if(qFileName != ""){ qualFileNames = fastaFileNames; } if(allFiles){ + set uniqueNames; //used to cleanup outputFileNames for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ @@ -918,15 +982,22 @@ void TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< ofstream temp; fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta"; - outputNames.push_back(fastaFileName); - outputTypes["fasta"].push_back(fastaFileName); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + } + fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; m->openOutputFile(fastaFileName, temp); temp.close(); if(qFileName != ""){ qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual"; - outputNames.push_back(qualFileName); - outputTypes["qfile"].push_back(qualFileName); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + qualFileNames[itBar->second][itPrimer->second] = qualFileName; m->openOutputFile(qualFileName, temp); temp.close(); } @@ -935,6 +1006,7 @@ void TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< } numFPrimers = primers.size(); numRPrimers = revPrimer.size(); + groupCounts.resize(barcodeNameVector.size(), 0); } catch(exception& e) {