X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=chimeraslayercommand.cpp;h=d16843c8d612fac2bde8c03db8e45ce68e869412;hp=a29fc82727eb0d68d8d8a071d2e969dfab8d2103;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=deba0af0ccdcb6005ed5b2b82649b137c63fbdf7 diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index a29fc82..d16843c 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -31,12 +31,14 @@ vector ChimeraSlayerCommand::setParameters(){ CommandParameter pminbs("minbs", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pminbs); CommandParameter psearch("search", "Multiple", "kmer-blast", "blast", "", "", "","",false,false); parameters.push_back(psearch); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + CommandParameter prealign("realign", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(prealign); CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "","fasta",false,false); parameters.push_back(ptrim); CommandParameter psplit("split", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psplit); CommandParameter pnumwanted("numwanted", "Number", "", "15", "", "", "","",false,false); parameters.push_back(pnumwanted); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "","",false,false); parameters.push_back(pdivergence); + CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups); CommandParameter pparents("parents", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pparents); CommandParameter pincrement("increment", "Number", "", "5", "", "", "","",false,false); parameters.push_back(pincrement); CommandParameter pblastlocation("blastlocation", "String", "", "", "", "", "","",false,false); parameters.push_back(pblastlocation); @@ -59,17 +61,18 @@ string ChimeraSlayerCommand::getHelpString(){ string helpString = ""; helpString += "The chimera.slayer command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n"; - helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n"; + helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, dereplicate, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n"; helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; - helpString += "The count parameter allows you to provide a count file. The count file reference=self. If your count file contains group information, when checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; + helpString += "The count parameter allows you to provide a count file. The count file reference=self. If your count file contains group information, when checking sequences, only sequences from the same group as the query sequence will be used as the reference. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; #ifdef USE_MPI helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n"; #endif + helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n"; helpString += "The split parameter allows you to check both pieces of non-chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n"; helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=50. \n"; @@ -107,7 +110,8 @@ string ChimeraSlayerCommand::getOutputPattern(string type) { if (type == "chimera") { pattern = "[filename],slayer.chimeras"; } else if (type == "accnos") { pattern = "[filename],slayer.accnos"; } - else if (type == "fasta") { pattern = "[filename],slayer.fasta"; } + else if (type == "fasta") { pattern = "[filename],slayer.fasta"; } + else if (type == "count") { pattern = "[filename],slayer.pick.count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -126,6 +130,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(){ outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["count"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand"); @@ -162,6 +167,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["count"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -595,6 +601,10 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { temp = validParameter.validFile(parameters, "numwanted", false); if (temp == "not found") { temp = "15"; } m->mothurConvert(temp, numwanted); + + temp = validParameter.validFile(parameters, "dereplicate", false); + if (temp == "not found") { temp = "false"; } + dups = m->isTrue(temp); blastlocation = validParameter.validFile(parameters, "blastlocation", false); if (blastlocation == "not found") { blastlocation = ""; } @@ -667,6 +677,7 @@ int ChimeraSlayerCommand::execute(){ string outputFileName = getOutputFileName("chimera", variables); string accnosFileName = getOutputFileName("accnos", variables); string trimFastaFileName = getOutputFileName("fasta", variables); + string newCountFile = ""; //clears files ofstream out, out1, out2; @@ -736,11 +747,36 @@ int ChimeraSlayerCommand::execute(){ if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } #endif }else { //you have provided a groupfile + string countFile = ""; + if (hasCount) { + countFile = nameFileNames[s]; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s])); + newCountFile = getOutputFileName("count", variables); + } #ifdef USE_MPI - MPIExecuteGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup); + MPIExecuteGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup, newCountFile, countFile); #else - if (processors == 1) { numSeqs = driverGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup); } - else { numSeqs = createProcessesGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup); } //destroys fileToPriority + if (processors == 1) { + numSeqs = driverGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup, newCountFile); + if (hasCount && dups) { + CountTable c; c.readTable(nameFileNames[s]); + if (!m->isBlank(newCountFile)) { + ifstream in2; + m->openInputFile(newCountFile, in2); + + string name, group; + while (!in2.eof()) { + in2 >> name >> group; m->gobble(in2); + c.setAbund(name, group, 0); + } + in2.close(); + } + m->mothurRemove(newCountFile); + c.printTable(newCountFile); + } + + } + else { numSeqs = createProcessesGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup, newCountFile, countFile); } //destroys fileToPriority #endif #ifdef USE_MPI @@ -749,8 +785,32 @@ int ChimeraSlayerCommand::execute(){ if (pid == 0) { #endif - totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, trimFastaFileName); - m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); + if (!dups) { + totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, trimFastaFileName); + m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); + }else { + if (hasCount) { + set doNotRemove; + CountTable c; c.readTable(newCountFile); + vector namesInTable = c.getNamesOfSeqs(); + for (int i = 0; i < namesInTable.size(); i++) { + int temp = c.getNumSeqs(namesInTable[i]); + if (temp == 0) { c.remove(namesInTable[i]); } + else { doNotRemove.insert((namesInTable[i])); } + } + //remove names we want to keep from accnos file. + set accnosNames = m->readAccnos(accnosFileName); + ofstream out2; + m->openOutputFile(accnosFileName, out2); + for (set::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) { + if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; } + } + out2.close(); + c.printTable(newCountFile); + outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); + } + } + #ifdef USE_MPI } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait @@ -774,6 +834,11 @@ int ChimeraSlayerCommand::execute(){ } } + itTypes = outputTypes.find("count"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } + } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -788,7 +853,7 @@ int ChimeraSlayerCommand::execute(){ } } //********************************************************************************************************************** -int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosFileName, string trimFastaFileName, map >& fileToPriority, map& fileGroup){ +int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosFileName, string trimFastaFileName, map >& fileToPriority, map& fileGroup, string countlist, string countfile){ try { #ifdef USE_MPI int pid; @@ -814,6 +879,7 @@ int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosF MPI_File outMPI; MPI_File outMPIAccnos; MPI_File outMPIFasta; + MPI_File outMPICount; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; @@ -826,12 +892,16 @@ int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosF char outFastaFilename[1024]; strcpy(outFastaFilename, trimFastaFileName.c_str()); + + char outCountFilename[1024]; + strcpy(outCountFilename, countlist.c_str()); MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); } + if (hasCount && dups) { MPI_File_open(MPI_COMM_WORLD, outCountFilename, outMode, MPI_INFO_NULL, &outMPICount); } - if (m->control_pressed) { MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; } + if (m->control_pressed) { MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); if (hasCount && dups) { MPI_File_close(&outMPICount); } return 0; } //print headers if (pid == 0) { //you are the root process @@ -862,16 +932,55 @@ int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosF strcpy(inFileName, thisFastaName.c_str()); MPI_File inMPI; MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - + MPIPos = m->setFilePosFasta(thisFastaName, num); //fills MPIPos, returns numSeqs cout << endl << "Checking sequences from group: " << fileGroup[thisFastaName] << "." << endl; - driverMPI(0, num, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, thisFastaName, thisPriority, true); + set cnames; + driverMPI(0, num, inMPI, outMPI, outMPIAccnos, outMPIFasta, cnames, MPIPos, thisFastaName, thisPriority, true); numSeqs += num; MPI_File_close(&inMPI); m->mothurRemove(thisFastaName); + + if (dups) { + if (cnames.size() != 0) { + if (hasCount) { + for (set::iterator it = cnames.begin(); it != cnames.end(); it++) { + string outputString = (*it) + "\t" + fileGroup[thisFastaName] + "\n"; + int length = outputString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); + MPI_File_write_shared(outMPICount, buf2, length, MPI_CHAR, &status); + delete buf2; + } + }else { + map >::iterator itGroupNameMap = group2NameMap.find(fileGroup[thisFastaName]); + if (itGroupNameMap != group2NameMap.end()) { + map thisnamemap = itGroupNameMap->second; + map::iterator itN; + for (set::iterator it = cnames.begin(); it != cnames.end(); it++) { + itN = thisnamemap.find(*it); + if (itN != thisnamemap.end()) { + vector tempNames; m->splitAtComma(itN->second, tempNames); + for (int j = 0; j < tempNames.size(); j++) { //write to accnos file + string outputString = tempNames[j] + "\n"; + int length = outputString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); + + MPI_File_write_shared(outMPIAccnos, buf2, length, MPI_CHAR, &status); + delete buf2; + } + + }else { m->mothurOut("[ERROR]: parsing cannot find " + *it + ".\n"); m->control_pressed = true; } + } + }else { m->mothurOut("[ERROR]: parsing cannot find " + fileGroup[thisFastaName] + ".\n"); m->control_pressed = true; } + } + + } + } cout << endl << "It took " << toString(time(NULL) - start) << " secs to check " + toString(num) + " sequences from group " << fileGroup[thisFastaName] << "." << endl; } @@ -887,6 +996,7 @@ int ChimeraSlayerCommand::MPIExecuteGroups(string outputFileName, string accnosF MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); if (trim) { MPI_File_close(&outMPIFasta); } + if (hasCount && dups) { MPI_File_close(&outMPICount); } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait #endif @@ -972,7 +1082,8 @@ int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, st } //do your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, inputFile, priority, false); + set cnames; + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, cnames, MPIPos, inputFile, priority, false); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; } @@ -988,7 +1099,8 @@ int ChimeraSlayerCommand::MPIExecute(string inputFile, string outputFileName, st if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } //do your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos, inputFile, priority, false); + set cnames; + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, cnames, MPIPos, inputFile, priority, false); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); return 0; } @@ -1246,6 +1358,7 @@ int ChimeraSlayerCommand::setUpForSelfReference(SequenceParser*& parser, map thisGroupsSeqs = parser->getSeqs(groups[i]); map thisGroupsMap = parser->getNameMap(groups[i]); + group2NameMap[groups[i]] = thisGroupsMap; string newFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + groups[i] + "-sortedTemp.fasta"; priority = sortFastaFile(thisGroupsSeqs, thisGroupsMap, newFastaFile); fileToPriority[newFastaFile] = priority; @@ -1340,9 +1453,12 @@ string ChimeraSlayerCommand::getNamesFile(string& inputFile){ } //********************************************************************************************************************** -int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string fasta, map >& fileToPriority, map& fileGroup){ +int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string fasta, map >& fileToPriority, map& fileGroup, string countlist){ try { int totalSeqs = 0; + ofstream outCountList; + + if (hasCount && dups) { m->openOutputFile(countlist, outCountList); } for (map >::iterator itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { @@ -1367,6 +1483,44 @@ int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string #endif int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority); + //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table + //This table will zero out group counts for seqs determined to be chimeric by that group. + if (dups) { + if (!m->isBlank(thisaccnosFileName)) { + ifstream in; + m->openInputFile(thisaccnosFileName, in); + string name; + if (hasCount) { + while (!in.eof()) { + in >> name; m->gobble(in); + outCountList << name << '\t' << fileGroup[thisFastaName] << endl; + } + in.close(); + }else { + map >::iterator itGroupNameMap = group2NameMap.find(fileGroup[thisFastaName]); + if (itGroupNameMap != group2NameMap.end()) { + map thisnamemap = itGroupNameMap->second; + map::iterator itN; + ofstream out; + m->openOutputFile(thisaccnosFileName+".temp", out); + while (!in.eof()) { + in >> name; m->gobble(in); + itN = thisnamemap.find(name); + if (itN != thisnamemap.end()) { + vector tempNames; m->splitAtComma(itN->second, tempNames); + for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; } + + }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; } + } + out.close(); + in.close(); + m->renameFile(thisaccnosFileName+".temp", thisaccnosFileName); + }else { m->mothurOut("[ERROR]: parsing cannot find " + fileGroup[thisFastaName] + ".\n"); m->control_pressed = true; } + } + + } + } + //append files m->appendFiles(thisoutputFileName, outputFName); m->mothurRemove(thisoutputFileName); m->appendFiles(thisaccnosFileName, accnos); m->mothurRemove(thisaccnosFileName); @@ -1378,6 +1532,8 @@ int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + fileGroup[thisFastaName] + "."); m->mothurOutEndLine(); } + if (hasCount && dups) { outCountList.close(); } + return totalSeqs; } catch(exception& e) { @@ -1386,13 +1542,16 @@ int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string } } /**************************************************************************************************/ -int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accnos, string fasta, map >& fileToPriority, map& fileGroup) { +int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accnos, string fasta, map >& fileToPriority, map& fileGroup, string countlist, string countFile) { try { int process = 1; int num = 0; processIDS.clear(); if (fileToPriority.size() < processors) { processors = fileToPriority.size(); } + + CountTable newCount; + if (hasCount && dups) { newCount.readTable(countFile); } int groupsPerProcessor = fileToPriority.size() / processors; int remainder = fileToPriority.size() % processors; @@ -1424,7 +1583,7 @@ int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accno processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", breakUp[process], fileGroup); + num = driverGroups(outputFName + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp", breakUp[process], fileGroup, accnos + toString(getpid()) + ".byCount"); //pass numSeqs to parent ofstream out; @@ -1440,7 +1599,7 @@ int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accno } } - num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup); + num = driverGroups(outputFName, accnos, fasta, breakUp[0], fileGroup, accnos + ".byCount"); //force parent to wait until all the processes are done for (int i=0;ifileToPriority.size() != pDataArray[i]->end) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->end) + " of " + toString(pDataArray[i]->fileToPriority.size()) + " groups assigned to it, quitting. \n"); m->control_pressed = true; + } num += pDataArray[i]->count; CloseHandle(hThreadArray[i]); delete pDataArray[i]; } #endif + //read my own + if (hasCount && dups) { + if (!m->isBlank(accnos + ".byCount")) { + ifstream in2; + m->openInputFile(accnos + ".byCount", in2); + + string name, group; + while (!in2.eof()) { + in2 >> name >> group; m->gobble(in2); + newCount.setAbund(name, group, 0); + } + in2.close(); + } + m->mothurRemove(accnos + ".byCount"); + } + //append output files for(int i=0;iappendFiles((fasta + toString(processIDS[i]) + ".temp"), fasta); m->mothurRemove((fasta + toString(processIDS[i]) + ".temp")); } + + if (hasCount && dups) { + if (!m->isBlank(accnos + toString(processIDS[i]) + ".byCount")) { + ifstream in2; + m->openInputFile(accnos + toString(processIDS[i]) + ".byCount", in2); + + string name, group; + while (!in2.eof()) { + in2 >> name >> group; m->gobble(in2); + newCount.setAbund(name, group, 0); + } + in2.close(); + } + m->mothurRemove(accnos + toString(processIDS[i]) + ".byCount"); + } + } + //print new *.pick.count_table + if (hasCount && dups) { newCount.printTable(countlist); } return num; } @@ -1652,7 +1848,7 @@ int ChimeraSlayerCommand::driver(linePair filePos, string outputFName, string fi } //********************************************************************************************************************** #ifdef USE_MPI -int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector& MPIPos, string filename, map& priority, bool byGroup){ +int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, set& cnames, vector& MPIPos, string filename, map& priority, bool byGroup){ try { MPI_Status status; int pid; @@ -1736,7 +1932,10 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil data_results rightResults = chimera->getResults(); //if either piece is chimeric then report - Sequence trimmed = chimera->print(outMPI, outAccMPI, leftResults, rightResults); + bool flag = false; + Sequence trimmed = chimera->print(outMPI, outAccMPI, leftResults, rightResults, flag); + if (flag) { cnames.insert(candidateSeq->getName()); } + if (trim) { string outputString = ">" + trimmed.getName() + "\n" + trimmed.getAligned() + "\n"; @@ -1754,6 +1953,7 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil }else { //print results Sequence trimmed = chimera->print(outMPI, outAccMPI); + cnames.insert(candidateSeq->getName()); if (trim) { string outputString = ">" + trimmed.getName() + "\n" + trimmed.getAligned() + "\n";