From 2a29fceeeb8754c3fd97ba830d2fbed5c4349ee8 Mon Sep 17 00:00:00 2001 From: SarahsWork Date: Tue, 5 Mar 2013 14:13:00 -0500 Subject: [PATCH] working on chimera.uchime change for dereplicate=t bug. added shared file to get.shareseqs command and changed groups parameters to shared groups and unique groups. fixed trimming bug with trim.flows. working on trim.seqs for paired barcodes. --- chimerauchimecommand.cpp | 176 +++++++++++-- chimerauchimecommand.h | 41 ++- getsharedotucommand.cpp | 545 ++++++++++++++++++++++++++++----------- getsharedotucommand.h | 6 +- seqerrorcommand.cpp | 7 + trimflowscommand.cpp | 2 +- trimflowscommand.h | 2 +- trimseqscommand.cpp | 13 +- trimseqscommand.h | 11 +- 9 files changed, 602 insertions(+), 201 deletions(-) diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 9a25582..3ed6170 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -65,7 +65,7 @@ string ChimeraUchimeCommand::getHelpString(){ helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n"; - helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n"; + helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; @@ -110,7 +110,8 @@ string ChimeraUchimeCommand::getOutputPattern(string type) { if (type == "chimera") { pattern = "[filename],uchime.chimeras"; } else if (type == "accnos") { pattern = "[filename],uchime.accnos"; } - else if (type == "alns") { pattern = "[filename],uchime.alns"; } + else if (type == "alns") { pattern = "[filename],uchime.alns"; } + else if (type == "count") { pattern = "[filename],uchime.pick.count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -129,6 +130,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(){ outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["alns"] = tempOutNames; + outputTypes["count"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand"); @@ -163,6 +165,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["alns"] = tempOutNames; + outputTypes["count"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -646,6 +649,7 @@ int ChimeraUchimeCommand::execute(){ string accnosFileName = getOutputFileName("accnos", variables); string alnsFileName = getOutputFileName("alns", variables); string newFasta = m->getRootName(fastaFileNames[s]) + "temp"; + string newCountFile = ""; //you provided a groupfile string groupFile = ""; @@ -654,6 +658,8 @@ int ChimeraUchimeCommand::execute(){ else if (hasCount) { CountTable ct; if (ct.testGroups(nameFileNames[s])) { hasGroup = true; } + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s])); + newCountFile = getOutputFileName("count", variables); } if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups @@ -719,19 +725,61 @@ int ChimeraUchimeCommand::execute(){ if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); } int totalSeqs = 0; - if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); } - else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); } + if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups); + + //read my own + if (hasCount && !dups) { + CountTable newCount; newCount.readTable(nameFile); + + if (!m->isBlank(newCountFile)) { + ifstream in2; + m->openInputFile(newCountFile, in2); + + string name, group; + while (!in2.eof()) { + in2 >> name >> group; m->gobble(in2); + newCount.setAbund(name, group, 0); + } + in2.close(); + } + m->mothurRemove(newCountFile); + newCount.printTable(newCountFile); + } + + }else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - if (hasCount) { delete cparser; } - else { delete sparser; } + if (!dups) { int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine(); m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); - } + }else { + /*if (hasCount) { //removed empty seqs + ofstream out2; + m->openOutputFile(accnosFileName, out2); + + CountTable c; c.readTable(newCountFile); + vector nseqs = c.getNamesOfSeqs(); + vector ngroups = c.getNamesOfGroups(); + for (int l = 0; l < nseqs.size(); l++) { + if (c.getNumSeqs(nseqs[l]) == 0) { + c.remove(nseqs[l]); + out2 << nseqs[l] << endl; + } + } + for (int l = 0; l < ngroups.size(); l++) { + if (c.getGroupCount(ngroups[l]) == 0) { c.removeGroup(ngroups[l]); } + } + out2.close(); + c.printTable(newCountFile); + }*/ + } + + if (hasCount) { delete cparser; } + else { delete sparser; } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } @@ -1128,20 +1176,23 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){ } } //********************************************************************************************************************** -int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector groups){ +int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector groups){ try { int totalSeqs = 0; int numChimeras = 0; - + + ofstream outCountList; + if (hasCount && dups) { m->openOutputFile(countlist, outCountList); } + for (int i = start; i < end; i++) { - int start = time(NULL); if (m->control_pressed) { return 0; } + int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; } int error; if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } } else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } } - int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras); + int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras); totalSeqs += numSeqs; if (m->control_pressed) { return 0; } @@ -1150,14 +1201,52 @@ int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, stri if (!m->debug) { m->mothurRemove(filename); } else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); } + //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table + //This table will zero out group counts for seqs determined to be chimeric by that group. + if (dups) { + if (!m->isBlank(accnos+groups[i])) { + ifstream in; + m->openInputFile(accnos+groups[i], in); + string name; + if (hasCount) { + while (!in.eof()) { + in >> name; m->gobble(in); + outCountList << name << '\t' << groups[i] << endl; + } + in.close(); + }else { + map thisnamemap = sparser->getNameMap(groups[i]); + map::iterator itN; + ofstream out; + m->openOutputFile(accnos+groups[i]+".temp", out); + while (!in.eof()) { + in >> name; m->gobble(in); + itN = thisnamemap.find(name); + if (itN != thisnamemap.end()) { + vector tempNames; m->splitAtComma(itN->second, tempNames); + for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; } + + }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; } + } + out.close(); + in.close(); + m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]); + } + + } + } + //append files m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i])); m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i])); if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine(); - } - return totalSeqs; + } + + if (hasCount && dups) { outCountList.close(); } + + return totalSeqs; } catch(exception& e) { @@ -1641,8 +1730,8 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename // Allocate memory for thread data. string extension = toString(i) + ".temp"; - uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i); - tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); + uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, "", dummy, m, 0, 0, i); + tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand); pDataArray.push_back(tempUchime); @@ -1694,12 +1783,15 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename } /**************************************************************************************************/ -int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector groups, string nameFile, string groupFile, string fastaFile) { +int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector groups, string nameFile, string groupFile, string fastaFile) { try { processIDS.clear(); int process = 1; int num = 0; + + CountTable newCount; + if (hasCount && dups) { newCount.readTable(nameFile); } //sanity check if (groups.size() < processors) { processors = groups.size(); } @@ -1724,7 +1816,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); + num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", accnos + ".byCount." + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); //pass numSeqs to parent ofstream out; @@ -1742,22 +1834,22 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen } //do my part - num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;iopenInputFile(tempFile, in); if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } in.close(); m->mothurRemove(tempFile); - } - + } + #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the uchimeData struct. @@ -1773,8 +1865,8 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen // Allocate memory for thread data. string extension = toString(i) + ".temp"; - uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i); - tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); + uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, accnos+".byCount."+extension, groups, m, lines[i].start, lines[i].end, i); + tempUchime->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand); pDataArray.push_back(tempUchime); @@ -1787,7 +1879,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen //using the main process as a worker saves time and memory - num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); @@ -1798,8 +1890,25 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen CloseHandle(hThreadArray[i]); delete pDataArray[i]; } + + #endif + //read my own + if (hasCount && dups) { + if (!m->isBlank(accnos + ".byCount")) { + ifstream in2; + m->openInputFile(accnos + ".byCount", in2); + + string name, group; + while (!in2.eof()) { + in2 >> name >> group; m->gobble(in2); + newCount.setAbund(name, group, 0); + } + in2.close(); + } + m->mothurRemove(accnos + ".byCount"); + } //append output files for(int i=0;iappendFiles((alns + toString(processIDS[i]) + ".temp"), alns); m->mothurRemove((alns + toString(processIDS[i]) + ".temp")); } + + if (hasCount && dups) { + if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) { + ifstream in2; + m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2); + + string name, group; + while (!in2.eof()) { + in2 >> name >> group; m->gobble(in2); + newCount.setAbund(name, group, 0); + } + in2.close(); + } + m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp"); + } + } + + //print new *.pick.count_table + if (hasCount && dups) { newCount.printTable(newCountFile); } return num; diff --git a/chimerauchimecommand.h b/chimerauchimecommand.h index 6d9d001..2237493 100644 --- a/chimerauchimecommand.h +++ b/chimerauchimecommand.h @@ -63,8 +63,8 @@ private: int readFasta(string, map&); int printFile(vector&, string); int deconvoluteResults(map&, string, string, string); - int driverGroups(string, string, string, string, int, int, vector); - int createProcessesGroups(string, string, string, string, vector, string, string, string); + int driverGroups(string, string, string, string, string, int, int, vector); + int createProcessesGroups(string, string, string, string, string, vector, string, string, string); int prepFile(string filename, string); @@ -80,17 +80,17 @@ struct uchimeData { string namefile; string groupfile; string outputFName; - string accnos, alns, filename, templatefile, uchimeLocation; + string accnos, alns, filename, templatefile, uchimeLocation, countlist; MothurOut* m; int start; int end; int threadID, count, numChimeras; vector groups; - bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount; + bool dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount; string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand; uchimeData(){} - uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac, string al, vector gr, MothurOut* mout, int st, int en, int tid) { + uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac, string al, string nc, vector gr, MothurOut* mout, int st, int en, int tid) { fastafile = f; namefile = n; groupfile = g; @@ -107,8 +107,9 @@ struct uchimeData { count = 0; numChimeras = 0; uchimeLocation = uloc; + countlist = nc; } - void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) { + void setBooleans(bool dps, bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) { useAbskew = Abskew; chimealns = calns; useMinH = MinH; @@ -128,6 +129,7 @@ struct uchimeData { ucl = uc; useQueryfract = Queryfract; hasCount = hc; + dups = dps; } void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) { @@ -183,6 +185,10 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ int totalSeqs = 0; int numChimeras = 0; + + ofstream outCountList; + if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); } + for (int i = pDataArray->start; i < pDataArray->end; i++) { int start = time(NULL); if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; } @@ -448,9 +454,14 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ ofstream out; pDataArray->m->openOutputFile(accnos, out); + int num = 0; numChimeras = 0; + map thisnamemap; + map::iterator itN; + if (pDataArray->dups && !pDataArray->hasCount) { thisnamemap = parser->getNameMap(pDataArray->groups[i]); } + while(!in.eof()) { if (pDataArray->m->control_pressed) { break; } @@ -466,7 +477,22 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ for (int j = 0; j < 15; j++) { in >> chimeraFlag; } pDataArray->m->gobble(in); - if (chimeraFlag == "Y") { out << name << endl; numChimeras++; } + if (chimeraFlag == "Y") { + if (pDataArray->dups) { + if (!pDataArray->hasCount) { //output redundant names for each group + itN = thisnamemap.find(name); + if (itN != thisnamemap.end()) { + vector tempNames; pDataArray->m->splitAtComma(itN->second, tempNames); + for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; } + }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; } + + }else { + out << name << endl; + outCountList << name << '\t' << pDataArray->groups[i] << endl; + } + }else{ out << name << endl; } + numChimeras++; + } num++; } in.close(); @@ -491,6 +517,7 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ } + if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); } pDataArray->count = totalSeqs; if (pDataArray->hasCount) { delete cparser; } { delete parser; } return totalSeqs; diff --git a/getsharedotucommand.cpp b/getsharedotucommand.cpp index 5ae8da5..1074d32 100644 --- a/getsharedotucommand.cpp +++ b/getsharedotucommand.cpp @@ -13,13 +13,14 @@ //********************************************************************************************************************** vector GetSharedOTUCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false); parameters.push_back(pfasta); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(pgroup); - CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(plist); + CommandParameter pfasta("fasta", "InputTypes", "", "", "sharedFasta", "none", "none","fasta",false,false); parameters.push_back(pfasta); + CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "groupList","",false,false,true); parameters.push_back(pgroup); + CommandParameter plist("list", "InputTypes", "", "", "sharedList", "sharedList", "groupList","sharedseq",false,false,true); parameters.push_back(plist); + CommandParameter pshared("shared", "InputTypes", "", "", "sharedList-sharedFasta", "sharedList", "none","sharedseq",false,false,true); parameters.push_back(pshared); CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter punique("unique", "String", "", "", "", "", "","",false,false,true); parameters.push_back(punique); - CommandParameter pshared("shared", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pshared); + CommandParameter puniquegroups("uniquegroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(puniquegroups); + CommandParameter psharedgroups("sharedgroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(psharedgroups); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -36,18 +37,18 @@ vector GetSharedOTUCommand::setParameters(){ string GetSharedOTUCommand::getHelpString(){ try { string helpString = ""; - helpString += "The get.sharedseqs command parameters are list, group, label, unique, shared, output and fasta. The list and group parameters are required, unless you have valid current files.\n"; + helpString += "The get.sharedseqs command parameters are list, group, shared, label, uniquegroups, sharedgroups, output and fasta. The list and group or shared parameters are required, unless you have valid current files.\n"; helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n"; - helpString += "The unique and shared parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n"; - helpString += "If you enter your groups under the unique parameter mothur will return the otus that contain ONLY sequences from those groups.\n"; - helpString += "If you enter your groups under the shared parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n"; - helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group file.\n"; - helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified.\n"; + helpString += "The uniquegroups and sharedgroups parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n"; + helpString += "If you enter your groups under the uniquegroups parameter mothur will return the otus that contain ONLY sequences from those groups.\n"; + helpString += "If you enter your groups under the sharedgroups parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n"; + helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group or shared file.\n"; + helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified. It can only be used with a list and group file not the shared file input.\n"; helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n"; helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n"; helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n"; - helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, unique=yourGroups, fasta=yourFastafile, output=yourOutput).\n"; - helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group= amazon.groups, unique=forest-pasture, fasta=amazon.fasta, output=accnos).\n"; + helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, uniquegroups=yourGroups, fasta=yourFastafile, output=yourOutput).\n"; + helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=amazon.groups, uniquegroups=forest-pasture, fasta=amazon.fasta, output=accnos).\n"; helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n"; helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n"; helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n"; @@ -145,6 +146,14 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["list"] = inputDir + it->second; } } + + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } it = parameters.find("group"); //user has given a template file @@ -159,28 +168,53 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option) { //check for required parameters listfile = validParameter.validFile(parameters, "list", true); if (listfile == "not open") { abort = true; } - else if (listfile == "not found") { - listfile = m->getListFile(); - if (listfile != "") { format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); } - else { - m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine(); - abort = true; - } - }else { format = "list"; m->setListFile(listfile); } + else if (listfile == "not found") { listfile = ""; } + else { format = "list"; m->setListFile(listfile); } groupfile = validParameter.validFile(parameters, "group", true); if (groupfile == "not open") { abort = true; } - else if (groupfile == "not found") { - groupfile = m->getGroupFile(); - if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); } - else { - m->mothurOut("No valid current group file. You must provide a group file."); m->mothurOutEndLine(); - abort = true; + else if (groupfile == "not found") { groupfile = ""; } + else { m->setGroupFile(groupfile); } + + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; } + else { m->setSharedFile(sharedfile); } + + fastafile = validParameter.validFile(parameters, "fasta", true); + if (fastafile == "not open") { abort = true; } + else if (fastafile == "not found") { fastafile = ""; } + else { m->setFastaFile(fastafile); } + + + if ((sharedfile == "") && (listfile == "")) { //look for currents + //is there are current file available for either of these? + //give priority to shared, then list + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { + listfile = m->getListFile(); + if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a shared or list file."); m->mothurOutEndLine(); + abort = true; + } } - }else { m->setGroupFile(groupfile); } - - if ((listfile == "") || (groupfile == "")) { m->mothurOut("The list and group parameters are required."); m->mothurOutEndLine(); abort = true; } + }else if ((sharedfile != "") && (listfile != "")) { + m->mothurOut("You may enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true; + } + if (listfile != "") { + if (groupfile == ""){ + groupfile = m->getGroupFile(); + if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You need to provide a group file if you are going to use the list format."); m->mothurOutEndLine(); abort = true; + } + } + } + + if ((sharedfile != "") && (fastafile != "")) { m->mothurOut("You cannot use the fasta file with the shared file."); m->mothurOutEndLine(); abort = true; } + //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.validFile(parameters, "label", false); @@ -194,29 +228,22 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option) { if (output == "not found") { output = ""; } else if (output == "default") { output = ""; } - groups = validParameter.validFile(parameters, "unique", false); + groups = validParameter.validFile(parameters, "uniquegroups", false); if (groups == "not found") { groups = ""; } else { userGroups = "unique." + groups; m->splitAtDash(groups, Groups); - m->setGroups(Groups); - } - groups = validParameter.validFile(parameters, "shared", false); + groups = validParameter.validFile(parameters, "sharedgroups", false); if (groups == "not found") { groups = ""; } else { userGroups = groups; m->splitAtDash(groups, Groups); - m->setGroups(Groups); unique = false; } - fastafile = validParameter.validFile(parameters, "fasta", true); - if (fastafile == "not open") { abort = true; } - else if (fastafile == "not found") { fastafile = ""; } - else { m->setFastaFile(fastafile); } - } + } } catch(exception& e) { @@ -231,125 +258,128 @@ int GetSharedOTUCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - groupMap = new GroupMap(groupfile); - int error = groupMap->readMap(); - if (error == 1) { delete groupMap; return 0; } - - if (m->control_pressed) { delete groupMap; return 0; } - - if (Groups.size() == 0) { - Groups = groupMap->getNamesOfGroups(); - - //make string for outputfile name - userGroups = "unique."; - for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; } - userGroups = userGroups.substr(0, userGroups.length()-1); - }else{ - //sanity check for group names - SharedUtil util; - vector namesOfGroups = groupMap->getNamesOfGroups(); - util.setGroups(Groups, namesOfGroups); - groupMap->setNamesOfGroups(namesOfGroups); - } - - //put groups in map to find easier - for(int i = 0; i < Groups.size(); i++) { - groupFinder[Groups[i]] = Groups[i]; - } - - if (fastafile != "") { - ifstream inFasta; - m->openInputFile(fastafile, inFasta); - - while(!inFasta.eof()) { - if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; } - - Sequence seq(inFasta); m->gobble(inFasta); - if (seq.getName() != "") { seqs.push_back(seq); } - } - inFasta.close(); - } - - ListVector* lastlist = NULL; - string lastLabel = ""; - - //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. - set processedLabels; - set userLabels = labels; - - ifstream in; - m->openInputFile(listfile, in); - - //as long as you are not at the end of the file or done wih the lines you want - while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) { - - if (m->control_pressed) { - if (lastlist != NULL) { delete lastlist; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - delete groupMap; return 0; - } - - list = new ListVector(in); - - if(allLines == 1 || labels.count(list->getLabel()) == 1){ - m->mothurOut(list->getLabel()); - process(list); - - processedLabels.insert(list->getLabel()); - userLabels.erase(list->getLabel()); - } - - if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { - string saveLabel = list->getLabel(); - - m->mothurOut(lastlist->getLabel()); - process(lastlist); - - processedLabels.insert(lastlist->getLabel()); - userLabels.erase(lastlist->getLabel()); - - //restore real lastlabel to save below - list->setLabel(saveLabel); - } + if ( sharedfile != "") { runShared(); } + else { + m->setGroups(Groups); + groupMap = new GroupMap(groupfile); + int error = groupMap->readMap(); + if (error == 1) { delete groupMap; return 0; } + + if (m->control_pressed) { delete groupMap; return 0; } + + if (Groups.size() == 0) { + Groups = groupMap->getNamesOfGroups(); + + //make string for outputfile name + userGroups = "unique."; + for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; } + userGroups = userGroups.substr(0, userGroups.length()-1); + }else{ + //sanity check for group names + SharedUtil util; + vector namesOfGroups = groupMap->getNamesOfGroups(); + util.setGroups(Groups, namesOfGroups); + groupMap->setNamesOfGroups(namesOfGroups); + } + + //put groups in map to find easier + for(int i = 0; i < Groups.size(); i++) { + groupFinder[Groups[i]] = Groups[i]; + } + + if (fastafile != "") { + ifstream inFasta; + m->openInputFile(fastafile, inFasta); + + while(!inFasta.eof()) { + if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; } + + Sequence seq(inFasta); m->gobble(inFasta); + if (seq.getName() != "") { seqs.push_back(seq); } + } + inFasta.close(); + } + + ListVector* lastlist = NULL; + string lastLabel = ""; + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + ifstream in; + m->openInputFile(listfile, in); + + //as long as you are not at the end of the file or done wih the lines you want + while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { + if (lastlist != NULL) { delete lastlist; } + for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); + delete groupMap; return 0; + } + + list = new ListVector(in); + + if(allLines == 1 || labels.count(list->getLabel()) == 1){ + m->mothurOut(list->getLabel()); + process(list); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + } + + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + m->mothurOut(lastlist->getLabel()); + process(lastlist); + + processedLabels.insert(lastlist->getLabel()); + userLabels.erase(lastlist->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } - lastLabel = list->getLabel(); - - if (lastlist != NULL) { delete lastlist; } - lastlist = list; - } - - in.close(); - - //output error messages about any remaining user labels - set::iterator it; - bool needToRun = false; - for (it = userLabels.begin(); it != userLabels.end(); it++) { - m->mothurOut("Your file does not include the label " + *it); - if (processedLabels.count(lastLabel) != 1) { - m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); - needToRun = true; - }else { - m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); - } - } - - //run last label if you need to - if (needToRun == true) { - m->mothurOut(lastlist->getLabel()); - process(lastlist); - - processedLabels.insert(lastlist->getLabel()); - userLabels.erase(lastlist->getLabel()); - } - + lastLabel = list->getLabel(); + + if (lastlist != NULL) { delete lastlist; } + lastlist = list; + } + + in.close(); + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + m->mothurOut(lastlist->getLabel()); + process(lastlist); + + processedLabels.insert(lastlist->getLabel()); + userLabels.erase(lastlist->getLabel()); + } + - //reset groups parameter - m->clearGroups(); - - if (lastlist != NULL) { delete lastlist; } - - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete groupMap; return 0; } - + //reset groups parameter + m->clearGroups(); + + if (lastlist != NULL) { delete lastlist; } + + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete groupMap; return 0; } + } //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); @@ -521,5 +551,206 @@ int GetSharedOTUCommand::process(ListVector* shared) { exit(1); } } +/***********************************************************/ +int GetSharedOTUCommand::runShared() { + try { + InputData input(sharedfile, "sharedfile"); + vector lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + if (Groups.size() == 0) { + Groups = m->getGroups(); + + //make string for outputfile name + userGroups = "unique."; + for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; } + userGroups = userGroups.substr(0, userGroups.length()-1); + }else { + //sanity check for group names + SharedUtil util; + vector allGroups = m->getAllGroups(); + util.setGroups(Groups, allGroups); + } + + //put groups in map to find easier + for(int i = 0; i < Groups.size(); i++) { + groupFinder[Groups[i]] = Groups[i]; + } + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + if (m->control_pressed) { + outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; + } + + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + m->mothurOut(lookup[0]->getLabel()); + process(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); + process(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + + //get next line to process + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(); + } + + if (m->control_pressed) { + outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; + } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); + process(lookup); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + + //reset groups parameter + m->clearGroups(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GetSharedOTUCommand", "runShared"); + exit(1); + } +} +/***********************************************************/ +int GetSharedOTUCommand::process(vector& lookup) { + try { + + string outputFileNames; + if (outputDir == "") { outputDir += m->hasPath(sharedfile); } + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = lookup[0]->getLabel(); + variables["[group]"] = userGroups; + if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); } + else { outputFileNames = getOutputFileName("accnos", variables); } + + ofstream outNames; + m->openOutputFile(outputFileNames, outNames); + + bool wroteSomething = false; + int num = 0; + + //go through each bin, find out if shared + for (int i = 0; i < lookup[0]->getNumBins(); i++) { + if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; } + + bool uniqueOTU = true; + map atLeastOne; + for (int f = 0; f < Groups.size(); f++) { atLeastOne[Groups[f]] = 0; } + + set namesOfGroupsInThisBin; + + for(int j = 0; j < lookup.size(); j++) { + string seqGroup = lookup[j]->getGroup(); + string name = m->currentBinLabels[i]; + + if (lookup[j]->getAbundance(i) != 0) { + if (output != "accnos") { + namesOfGroupsInThisBin.insert(name + "|" + seqGroup + "|" + toString(lookup[j]->getAbundance(i))); + }else { namesOfGroupsInThisBin.insert(name); } + + //is this seq in one of the groups we care about + it = groupFinder.find(seqGroup); + if (it == groupFinder.end()) { uniqueOTU = false; } //you have sequences from a group you don't want + else { atLeastOne[seqGroup]++; } + } + } + + //make sure you have at least one seq from each group you want + bool sharedByAll = true; + map::iterator it2; + for (it2 = atLeastOne.begin(); it2 != atLeastOne.end(); it2++) { + if (it2->second == 0) { sharedByAll = false; } + } + + //if the user wants unique bins and this is unique then print + //or this the user wants shared bins and this bin is shared then print + if ((unique && uniqueOTU && sharedByAll) || (!unique && sharedByAll)) { + + wroteSomething = true; + num++; + + //output list of names + for (set::iterator itNames = namesOfGroupsInThisBin.begin(); itNames != namesOfGroupsInThisBin.end(); itNames++) { + outNames << (*itNames) << endl; + } + } + } + outNames.close(); + + if (!wroteSomething) { + m->mothurRemove(outputFileNames); + string outputString = "\t" + toString(num) + " - No otus shared by groups"; + + string groupString = ""; + for (int h = 0; h < Groups.size(); h++) { + groupString += " " + Groups[h]; + } + + outputString += groupString + "."; + m->mothurOut(outputString); m->mothurOutEndLine(); + }else { + m->mothurOut("\t" + toString(num)); m->mothurOutEndLine(); + outputNames.push_back(outputFileNames); + if (output != "accnos") { outputTypes["sharedseqs"].push_back(outputFileNames); } + else { outputTypes["accnos"].push_back(outputFileNames); } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "GetSharedOTUCommand", "process"); + exit(1); + } +} //********************************************************************************************************************** diff --git a/getsharedotucommand.h b/getsharedotucommand.h index 583c2e0..f03b5dd 100644 --- a/getsharedotucommand.h +++ b/getsharedotucommand.h @@ -14,6 +14,8 @@ #include "listvector.hpp" #include "sequence.hpp" #include "groupmap.h" +#include "sharedrabundvector.h" +#include "inputdata.h" //********************************************************************************************************************** class GetSharedOTUCommand : public Command { @@ -44,7 +46,7 @@ class GetSharedOTUCommand : public Command { GroupMap* groupMap; set labels; - string fastafile, label, groups, listfile, groupfile, output, userGroups, outputDir, format; + string fastafile, label, groups, listfile, groupfile, sharedfile, output, userGroups, outputDir, format; bool abort, allLines, unique; vector Groups; map groupFinder; @@ -53,6 +55,8 @@ class GetSharedOTUCommand : public Command { vector outputNames; int process(ListVector*); + int process(vector&); + int runShared(); }; //********************************************************************************************************************** diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index 0a6eae9..641a3ab 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -407,6 +407,13 @@ int SeqErrorCommand::execute(){ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("errorseq"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } + } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 61b19b8..7ecdf74 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -489,7 +489,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if (pos == string::npos) { flowData.printFlows(trimFlowFile); - if(fasta) { currSeq.printSequence(fastaFile); } + if(fasta) { currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile); } if(allFiles){ ofstream output; diff --git a/trimflowscommand.h b/trimflowscommand.h index 065e6b4..f5493eb 100644 --- a/trimflowscommand.h +++ b/trimflowscommand.h @@ -224,7 +224,7 @@ static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ flowData.printFlows(trimFlowFile); - if(pDataArray->fasta) { currSeq.printSequence(fastaFile); } + if(pDataArray->fasta) { currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile); } if(pDataArray->allFiles){ ofstream output; diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 709202f..4b90cda 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -684,15 +684,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string //create reoriented primer and barcode pairs map rpairedPrimers, rpairedBarcodes; for (map::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) { - cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl; - cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl; - oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer + oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer rpairedPrimers[it->first] = tempPair; } for (map::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) { - cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl; - cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl; - oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode + oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode rpairedBarcodes[it->first] = tempPair; } rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); @@ -1561,6 +1557,8 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< } oligosPair newPrimer(oligo, roligo); + + if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } //check for repeat barcodes string tempPair = oligo+roligo; @@ -1604,7 +1602,8 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; } } - oligosPair newPair(oligo, reverseOligo(reverseBarcode)); + reverseBarcode = reverseOligo(reverseBarcode); + oligosPair newPair(oligo, reverseBarcode); if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } diff --git a/trimseqscommand.h b/trimseqscommand.h index 4d0a611..882f716 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -261,7 +261,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ TrimOligos* trimOligos = NULL; int numBarcodes = pDataArray->barcodes.size(); - if (pDataArray->pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes); numBarcodes = pDataArray->pairedBarcodes.size(); } + if (pDataArray->pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes); numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); } else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer); } TrimOligos* rtrimOligos = NULL; @@ -283,7 +283,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process if (pDataArray->m->control_pressed) { - delete trimOligos; + delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; } inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close(); if ((pDataArray->createGroup) && (pDataArray->countfile == "")) { outGroupsFile.close(); } if(pDataArray->qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); } @@ -372,6 +372,10 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ currentSeqsDiffs = thisCurrentSeqsDiffs; barcodeIndex = thisBarcodeIndex; primerIndex = thisPrimerIndex; + currSeq.reverseComplement(); + if(pDataArray->qFileName != ""){ + currQual.flipQScores(); + } } } @@ -567,12 +571,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } //report progress - if((i) % 1000 == 0){ pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine(); } + if((pDataArray->count) % 1000 == 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); } } //report progress if((pDataArray->count) % 1000 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); } + if (pDataArray->reorient) { delete rtrimOligos; } delete trimOligos; inFASTA.close(); trimFASTAFile.close(); -- 2.39.2