X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=trimseqscommand.cpp;h=49aca824ea46fb18c5845e4caa5bab0aca5db305;hb=6da6af5c7f43f9f4f602d224af447805fb01c46c;hp=2ad4f0607a24c23506109745553d608544a43d6c;hpb=4116449310d17a847470b84728cdefee5197e67e;p=mothur.git diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 2ad4f06..49aca82 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -14,7 +14,7 @@ vector TrimSeqsCommand::getValidParameters(){ try { - string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", + string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop","minlength", "maxlength", "qfile", "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "keepfirst", "removelast", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; @@ -34,7 +34,7 @@ TrimSeqsCommand::TrimSeqsCommand(){ abort = true; calledHelp = true; vector tempOutNames; outputTypes["fasta"] = tempOutNames; - outputTypes["qual"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; outputTypes["group"] = tempOutNames; } catch(exception& e) { @@ -83,7 +83,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else { //valid paramters for this command - string AlignArray[] = { "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", + string AlignArray[] = { "fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "keepfirst", "removelast", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; @@ -104,7 +104,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["fasta"] = tempOutNames; - outputTypes["qual"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; outputTypes["group"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter @@ -136,13 +136,6 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { if (path == "") { parameters["qfile"] = inputDir + it->second; } } - it = parameters.find("group"); - //user has given a template file - if(it != parameters.end()){ - path = m->hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["group"] = inputDir + it->second; } - } } @@ -170,10 +163,6 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else if(temp == "not open"){ abort = true; } else { oligoFile = temp; } - temp = validParameter.validFile(parameters, "group", true); - if (temp == "not found"){ groupfile = ""; } - else if(temp == "not open"){ abort = true; } - else { groupfile = temp; } temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } convert(temp, maxAmbig); @@ -236,13 +225,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); - if ((oligoFile != "") && (groupfile != "")) { - m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true; - } - - if(allFiles && (oligoFile == "") && (groupfile == "")){ - m->mothurOut("You selected allfiles, but didn't enter an oligos or group file. Ignoring the allfiles request."); m->mothurOutEndLine(); + if(allFiles && (oligoFile == "")){ + m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request."); m->mothurOutEndLine(); } if((qAverage != 0 && qThreshold != 0) && qFileName == ""){ m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine(); @@ -268,9 +253,8 @@ void TrimSeqsCommand::help(){ try { m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n"); m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n"); - m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n"); + m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n"); m->mothurOut("The fasta parameter is required.\n"); - m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n"); m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"); m->mothurOut("The oligos parameter allows you to provide an oligos file.\n"); m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n"); @@ -333,15 +317,14 @@ int TrimSeqsCommand::execute(){ if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); - outputTypes["qual"].push_back(trimQualFile); - outputTypes["qual"].push_back(scrapQualFile); + outputTypes["qfile"].push_back(trimQualFile); + outputTypes["qfile"].push_back(scrapQualFile); } string outputGroupFileName; - if(oligoFile != ""){ outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; - outputNames.push_back(outputGroupFileName); outputTypes["groups"].push_back(outputGroupFileName); + outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName); getOligos(fastaFileNames, qualFileNames); } @@ -363,33 +346,72 @@ int TrimSeqsCommand::execute(){ createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames); } #else - driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); + driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]); #endif if (m->control_pressed) { return 0; } - if(allFiles){ + map uniqueFastaNames;// so we don't add the same groupfile multiple times + map::iterator it; + set namesToRemove; for(int i=0;iisBlank(fastaFileNames[i][j])){ - remove(fastaFileNames[i][j].c_str()); - - if(qFileName != ""){ + if (fastaFileNames[i][j] != "") { + if(m->isBlank(fastaFileNames[i][j])){ remove(fastaFileNames[i][j].c_str()); + namesToRemove.insert(fastaFileNames[i][j]); + + if(qFileName != ""){ + remove(qualFileNames[i][j].c_str()); + namesToRemove.insert(qualFileNames[i][j]); + } + }else{ + it = uniqueFastaNames.find(fastaFileNames[i][j]); + if (it == uniqueFastaNames.end()) { + uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i]; + } } - } } } + + //remove names for outputFileNames, just cleans up the output + vector outputNames2; + for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } } + outputNames = outputNames2; + + for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) { + ifstream in; + m->openInputFile(it->first, in); + + ofstream out; + string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups"; + outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); + m->openOutputFile(thisGroupName, out); + + while (!in.eof()){ + if (m->control_pressed) { break; } + + Sequence currSeq(in); m->gobble(in); + out << currSeq.getName() << '\t' << it->second << endl; + } + in.close(); + out.close(); + } } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + //output group counts + m->mothurOutEndLine(); + int total = 0; +// for (int i = 0; i < barcodeNameVector.size(); i++) { +// if ((barcodeNameVector[i] != "") && (groupCounts[i] != 0)) { total += groupCounts[i]; m->mothurOut("Group " + barcodeNameVector[i] + " contains " + toString(groupCounts[i]) + " sequences."); m->mothurOutEndLine(); } +// } +// if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); } - - if (m->control_pressed) { - for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } - return 0; - } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -426,14 +448,15 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string ofstream outGroupsFile; if (oligoFile != ""){ m->openOutputFile(groupFileName, outGroupsFile); } - if(allFiles){ for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file - ofstream temp; - m->openOutputFile(fastaFileNames[i][j], temp); temp.close(); - if(qFileName != ""){ - m->openOutputFile(qualFileNames[i][j], temp); temp.close(); + if (fastaFileNames[i][j] != "") { + ofstream temp; + m->openOutputFile(fastaFileNames[i][j], temp); temp.close(); + if(qFileName != ""){ + m->openOutputFile(qualFileNames[i][j], temp); temp.close(); + } } } } @@ -474,7 +497,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string QualityScores currQual; if(qFileName != ""){ - currQual = QualityScores(qFile, currSeq.getNumBases()); m->gobble(qFile); + currQual = QualityScores(qFile); m->gobble(qFile); } string origSeq = currSeq.getUnaligned(); @@ -557,6 +580,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(barcodes.size() != 0){ outGroupsFile << currSeq.getName() << '\t' << barcodeNameVector[barcodeIndex] << endl; + groupCounts[barcodeIndex]++; } @@ -640,12 +664,14 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName for(int i=0;iopenOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); - - if(qFileName != ""){ - tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; - m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + if (tempFASTAFileNames[i][j] != "") { + tempFASTAFileNames[i][j] += toString(getpid()) + ".temp"; + m->openOutputFile(tempFASTAFileNames[i][j], temp); temp.close(); + + if(qFileName != ""){ + tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp"; + m->openOutputFile(tempPrimerQualFileNames[i][j], temp); temp.close(); + } } } } @@ -663,6 +689,15 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName lines[process], qLines[process]); + //pass groupCounts to parent + ofstream out; + string tempFile = filename + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + for(int i = 0; i < groupCounts.size(); i++) { + out << groupCounts[i] << endl; + } + out.close(); + exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); @@ -678,11 +713,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName m->openOutputFile(trimQualFileName, temp); temp.close(); m->openOutputFile(scrapQualFileName, temp); temp.close(); - - driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]); - //force parent to wait until all the processes are done for (int i=0;iappendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]); - remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); - - if(qFileName != ""){ - m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]); - remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + if (fastaFileNames[j][k] != "") { + m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]); + remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + + if(qFileName != ""){ + m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]); + remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + } } } } } + ifstream in; + string tempFile = filename + toString(processIDS[i]) + ".num.temp"; + m->openInputFile(tempFile, in); + int count = 0; + int tempNum; + while (!in.eof()) { + in >> tempNum; m->gobble(in); + groupCounts[count] += tempNum; + count++; + } + in.close(); remove(tempFile.c_str()); + } return exitCommand; @@ -910,6 +956,7 @@ void TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< if(qFileName != ""){ qualFileNames = fastaFileNames; } if(allFiles){ + set uniqueNames; //used to cleanup outputFileNames for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ @@ -934,15 +981,22 @@ void TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< ofstream temp; fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta"; - outputNames.push_back(fastaFileName); - outputTypes["fasta"].push_back(fastaFileName); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(fastaFileName); + outputTypes["fasta"].push_back(fastaFileName); + uniqueNames.insert(fastaFileName); + } + fastaFileNames[itBar->second][itPrimer->second] = fastaFileName; m->openOutputFile(fastaFileName, temp); temp.close(); if(qFileName != ""){ qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual"; - outputNames.push_back(qualFileName); - outputTypes["qual"].push_back(qualFileName); + if (uniqueNames.count(fastaFileName) == 0) { + outputNames.push_back(qualFileName); + outputTypes["qfile"].push_back(qualFileName); + } + qualFileNames[itBar->second][itPrimer->second] = qualFileName; m->openOutputFile(qualFileName, temp); temp.close(); } @@ -951,6 +1005,7 @@ void TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< } numFPrimers = primers.size(); numRPrimers = revPrimer.size(); + groupCounts.resize(barcodeNameVector.size(), 0); } catch(exception& e) {