From 3094cb29c613d9687e861e1d0cf9104b7141d24e Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 14 Nov 2011 20:28:59 +0000 Subject: [PATCH] finished shhh.seqs command, fixed bug with remove.groups and get.groups that caused file mismatch if column 1 of the names file was not chosen, added warning to sequence class about protein sequences. --- chimerauchimecommand.h | 2 +- getgroupscommand.cpp | 33 ++++++++++++++++ getgroupscommand.h | 3 ++ removegroupscommand.cpp | 36 ++++++++++++++--- removegroupscommand.h | 4 ++ screenseqscommand.cpp | 87 +++++++++++++++++++++++++++++++++++++++-- screenseqscommand.h | 3 +- sequence.cpp | 46 ++++++++++++++++------ sequence.hpp | 4 +- shhhseqscommand.cpp | 73 +++++++++++++++++++++++++--------- shhhseqscommand.h | 14 +++---- trimflowscommand.cpp | 4 +- 12 files changed, 258 insertions(+), 51 deletions(-) diff --git a/chimerauchimecommand.h b/chimerauchimecommand.h index 6e1f809..f6e0ab5 100644 --- a/chimerauchimecommand.h +++ b/chimerauchimecommand.h @@ -181,7 +181,7 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ string path = pDataArray->m->argv; string tempPath = path; - for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } + for (int j = 0; j < path.length(); j++) { tempPath[j] = tolower(path[j]); } path = path.substr(0, (tempPath.find_last_of('m'))); string uchimeCommand = path; diff --git a/getgroupscommand.cpp b/getgroupscommand.cpp index e8f42dd..e533bc4 100644 --- a/getgroupscommand.cpp +++ b/getgroupscommand.cpp @@ -384,6 +384,15 @@ int GetGroupsCommand::readFasta(){ currSeq.printSequence(out); selectedCount++; + }else{ + //if you are not in the accnos file check if you are a name that needs to be changed + map::iterator it = uniqueToRedundant.find(name); + if (it != uniqueToRedundant.end()) { + wroteSomething = true; + currSeq.setName(it->second); + currSeq.printSequence(out); + selectedCount++; + } } } m->gobble(in); @@ -499,10 +508,26 @@ int GetGroupsCommand::readList(){ //if that name is in the .accnos file, add it if (names.count(name) != 0) { newNames += name + ","; selectedCount++; } + else{ + //if you are not in the accnos file check if you are a name that needs to be changed + map::iterator it = uniqueToRedundant.find(name); + if (it != uniqueToRedundant.end()) { + newNames += it->second + ","; + selectedCount++; + } + } } //get last name if (names.count(binnames) != 0) { newNames += binnames + ","; selectedCount++; } + else{ + //if you are not in the accnos file check if you are a name that needs to be changed + map::iterator it = uniqueToRedundant.find(binnames); + if (it != uniqueToRedundant.end()) { + newNames += it->second + ","; + selectedCount++; + } + } //if there are names in this bin add to new list if (newNames != "") { @@ -594,6 +619,7 @@ int GetGroupsCommand::readName(){ //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; + uniqueToRedundant[firstCol] = validSecond[0]; } } @@ -687,6 +713,13 @@ int GetGroupsCommand::readTax(){ if (names.count(name) != 0) { wroteSomething = true; out << name << '\t' << tax << endl; + }else{ + //if you are not in the accnos file check if you are a name that needs to be changed + map::iterator it = uniqueToRedundant.find(name); + if (it != uniqueToRedundant.end()) { + wroteSomething = true; + out << it->second << '\t' << tax << endl; + } } m->gobble(in); diff --git a/getgroupscommand.h b/getgroupscommand.h index 6d766fb..e0fbad8 100644 --- a/getgroupscommand.h +++ b/getgroupscommand.h @@ -36,6 +36,9 @@ public: private: set names; + map uniqueToRedundant; //if a namefile is given and the first column name is not selected + //then the other files need to change the unique name in their file to match. + //only add the names that need to be changed to keep the map search quick string accnosfile, fastafile, namefile, groupfile, listfile, taxfile, outputDir, groups, sharedfile; bool abort; vector outputNames, Groups; diff --git a/removegroupscommand.cpp b/removegroupscommand.cpp index 725efaf..6ec0fcd 100644 --- a/removegroupscommand.cpp +++ b/removegroupscommand.cpp @@ -379,9 +379,16 @@ int RemoveGroupsCommand::readFasta(){ //if this name is in the accnos file if (names.count(name) == 0) { wroteSomething = true; - - currSeq.printSequence(out); - }else { removedCount++; } + currSeq.printSequence(out); + }else { + //if you are not in the accnos file check if you are a name that needs to be changed + map::iterator it = uniqueToRedundant.find(name); + if (it != uniqueToRedundant.end()) { + wroteSomething = true; + currSeq.setName(it->second); + currSeq.printSequence(out); + }else { removedCount++; } + } } m->gobble(in); } @@ -527,12 +534,23 @@ int RemoveGroupsCommand::readList(){ //if that name is in the .accnos file, add it if (names.count(name) == 0) { newNames += name + ","; } - else { removedCount++; } + else { + //if you are not in the accnos file check if you are a name that needs to be changed + map::iterator it = uniqueToRedundant.find(name); + if (it != uniqueToRedundant.end()) { + newNames += it->second + ","; + }else { removedCount++; } + } } //get last name if (names.count(binnames) == 0) { newNames += binnames + ","; } - else { removedCount++; } + else { //if you are not in the accnos file check if you are a name that needs to be changed + map::iterator it = uniqueToRedundant.find(binnames); + if (it != uniqueToRedundant.end()) { + newNames += it->second + ","; + }else { removedCount++; } + } //if there are names in this bin add to new list if (newNames != "") { @@ -624,6 +642,7 @@ int RemoveGroupsCommand::readName(){ //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; + uniqueToRedundant[firstCol] = validSecond[0]; } } @@ -718,7 +737,12 @@ int RemoveGroupsCommand::readTax(){ if (names.count(name) == 0) { wroteSomething = true; out << name << '\t' << tax << endl; - }else { removedCount++; } + }else { //if you are not in the accnos file check if you are a name that needs to be changed + map::iterator it = uniqueToRedundant.find(name); + if (it != uniqueToRedundant.end()) { + wroteSomething = true; + out << it->second << '\t' << tax << endl; + }else { removedCount++; } } m->gobble(in); } diff --git a/removegroupscommand.h b/removegroupscommand.h index 63c0bff..103ab66 100644 --- a/removegroupscommand.h +++ b/removegroupscommand.h @@ -39,6 +39,10 @@ private: bool abort; vector outputNames, Groups; GroupMap* groupMap; + map uniqueToRedundant; //if a namefile is given and the first column name is not selected + //then the other files need to change the unique name in their file to match. + //only add the names that need to be changed to keep the map search quick + int readFasta(); int readShared(); diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index d5f2a6c..cedb74a 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -18,6 +18,7 @@ vector ScreenSeqsCommand::setParameters(){ CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile); CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(palignreport); + CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax); CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart); CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend); CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig); @@ -44,8 +45,9 @@ string ScreenSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The screen.seqs command reads a fastafile and creates .....\n"; - helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, optimize, criteria and processors.\n"; + helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n"; helpString += "The fasta parameter is required.\n"; + helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n"; helpString += "The start parameter .... The default is -1.\n"; helpString += "The end parameter .... The default is -1.\n"; helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n"; @@ -80,6 +82,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(){ outputTypes["alignreport"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["qfile"] = tempOutNames; + outputTypes["taxonomy"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand"); @@ -118,6 +121,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option) { outputTypes["alignreport"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["qfile"] = tempOutNames; + outputTypes["taxonomy"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -164,6 +168,13 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option) { if (path == "") { parameters["qfile"] = inputDir + it->second; } } + it = parameters.find("taxonomy"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["taxonomy"] = inputDir + it->second; } + } } //check for required parameters @@ -193,7 +204,11 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option) { alignreport = validParameter.validFile(parameters, "alignreport", true); if (alignreport == "not open") { abort = true; } - else if (alignreport == "not found") { alignreport = ""; } + else if (alignreport == "not found") { alignreport = ""; } + + taxonomy = validParameter.validFile(parameters, "taxonomy", true); + if (taxonomy == "not open") { abort = true; } + else if (taxonomy == "not found") { taxonomy = ""; } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ @@ -483,6 +498,7 @@ int ScreenSeqsCommand::execute(){ if(alignreport != "") { screenAlignReport(badSeqNames); } if(qualfile != "") { screenQual(badSeqNames); } + if(taxonomy != "") { screenTaxonomy(badSeqNames); } if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; } @@ -519,6 +535,11 @@ int ScreenSeqsCommand::execute(){ if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); } } + + itTypes = outputTypes.find("taxonomy"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } + } m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); @@ -644,6 +665,16 @@ int ScreenSeqsCommand::getSummary(vector& positions){ lines.push_back(new linePair(positions[i], positions[(i+1)])); } + +#ifdef USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &pid); + + if (pid == 0) { //only one process should fix files + driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]); + } + + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait +#else int numSeqs = 0; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ @@ -657,7 +688,7 @@ int ScreenSeqsCommand::getSummary(vector& positions){ numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]); if (m->control_pressed) { return 0; } #endif - +#endif sort(startPosition.begin(), startPosition.end()); sort(endPosition.begin(), endPosition.end()); sort(seqLength.begin(), seqLength.end()); @@ -936,6 +967,56 @@ int ScreenSeqsCommand::screenAlignReport(set badSeqNames){ } //*************************************************************************************************************** +int ScreenSeqsCommand::screenTaxonomy(set badSeqNames){ + try { + ifstream input; + m->openInputFile(taxonomy, input); + string seqName, tax; + set::iterator it; + + string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + "good" + m->getExtension(taxonomy); + outputNames.push_back(goodTaxFile); outputTypes["taxonomy"].push_back(goodTaxFile); + ofstream goodTaxOut; m->openOutputFile(goodTaxFile, goodTaxOut); + + while(!input.eof()){ + if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; } + + input >> seqName >> tax; + it = badSeqNames.find(seqName); + + if(it != badSeqNames.end()){ badSeqNames.erase(it); } + else{ + goodTaxOut << seqName << '\t' << tax << endl; + } + m->gobble(input); + } + + if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; } + + //we were unable to remove some of the bad sequences + if (badSeqNames.size() != 0) { + for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) { + m->mothurOut("Your taxonomy file does not include the sequence " + *it + " please correct."); + m->mothurOutEndLine(); + } + } + + input.close(); + goodTaxOut.close(); + + if (m->control_pressed) { m->mothurRemove(goodTaxFile); return 0; } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "ScreenSeqsCommand", "screenTaxonomy"); + exit(1); + } + +} +//*************************************************************************************************************** + int ScreenSeqsCommand::screenQual(set badSeqNames){ try { ifstream in; diff --git a/screenseqscommand.h b/screenseqscommand.h index 87c8bee..49d992a 100644 --- a/screenseqscommand.h +++ b/screenseqscommand.h @@ -45,6 +45,7 @@ private: int screenGroupFile(set); int screenAlignReport(set); int screenQual(set); + int screenTaxonomy(set); int driver(linePair*, string, string, string, set&); int createProcesses(string, string, string, set&); @@ -54,7 +55,7 @@ private: #endif bool abort; - string fastafile, namefile, groupfile, alignreport, outputDir, qualfile; + string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy; int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria; vector outputNames; vector optimize; diff --git a/sequence.cpp b/sequence.cpp index 69e905e..162d3be 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -76,10 +76,15 @@ Sequence::Sequence(istringstream& fastaString){ while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - sequence = getSequenceString(fastaString); + int numAmbig = 0; + sequence = getSequenceString(fastaString, numAmbig); + setAligned(sequence); //setUnaligned removes any gap characters for us - setUnaligned(sequence); + setUnaligned(sequence); + + if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); } + }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); } } @@ -118,10 +123,14 @@ Sequence::Sequence(istringstream& fastaString, string JustUnaligned){ while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - sequence = getSequenceString(fastaString); + int numAmbig = 0; + sequence = getSequenceString(fastaString, numAmbig); //setUnaligned removes any gap characters for us - setUnaligned(sequence); + setUnaligned(sequence); + + if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); } + }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); } } @@ -163,11 +172,15 @@ Sequence::Sequence(ifstream& fastaFile){ //read real sequence while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - sequence = getSequenceString(fastaFile); - + int numAmbig = 0; + sequence = getSequenceString(fastaFile, numAmbig); + setAligned(sequence); //setUnaligned removes any gap characters for us setUnaligned(sequence); + + if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); } + }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); } } @@ -205,10 +218,14 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){ //read real sequence while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there - sequence = getSequenceString(fastaFile); + int numAmbig = 0; + sequence = getSequenceString(fastaFile, numAmbig); //setUnaligned removes any gap characters for us setUnaligned(sequence); + + if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); } + }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); } } @@ -219,10 +236,11 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){ } //******************************************************************************************************************** -string Sequence::getSequenceString(ifstream& fastaFile) { +string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) { try { char letter; string sequence = ""; + numAmbig = 0; while(fastaFile){ letter= fastaFile.get(); @@ -233,8 +251,9 @@ string Sequence::getSequenceString(ifstream& fastaFile) { else if(isprint(letter)){ letter = toupper(letter); if(letter == 'U'){letter = 'T';} - if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G' && letter != 'C'){ + if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G' && letter != 'C' && letter != 'N'){ letter = 'N'; + numAmbig++; } sequence += letter; } @@ -270,10 +289,11 @@ string Sequence::getCommentString(ifstream& fastaFile) { } } //******************************************************************************************************************** -string Sequence::getSequenceString(istringstream& fastaFile) { +string Sequence::getSequenceString(istringstream& fastaFile, int& numAmbig) { try { char letter; - string sequence = ""; + string sequence = ""; + numAmbig = 0; while(!fastaFile.eof()){ letter= fastaFile.get(); @@ -285,6 +305,10 @@ string Sequence::getSequenceString(istringstream& fastaFile) { else if(isprint(letter)){ letter = toupper(letter); if(letter == 'U'){letter = 'T';} + if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G' && letter != 'C' && letter != 'N'){ + letter = 'N'; + numAmbig++; + } sequence += letter; } } diff --git a/sequence.hpp b/sequence.hpp index 224ae5d..6a50cb0 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -65,9 +65,9 @@ public: private: MothurOut* m; void initialize(); - string getSequenceString(ifstream&); + string getSequenceString(ifstream&, int&); string getCommentString(ifstream&); - string getSequenceString(istringstream&); + string getSequenceString(istringstream&, int&); string getCommentString(istringstream&); string name; string unaligned; diff --git a/shhhseqscommand.cpp b/shhhseqscommand.cpp index 625b939..8ef4c8c 100644 --- a/shhhseqscommand.cpp +++ b/shhhseqscommand.cpp @@ -196,13 +196,16 @@ int ShhhSeqsCommand::execute() { m->openOutputFile(nameFileName, out1); out1.close(); mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh."; - if(processors == 1) { driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups); } - else { createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups); } + vector mapFileNames; + if(processors == 1) { mapFileNames = driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups); } + else { mapFileNames = createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups); } - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return 0; } - //deconvolute results by running unique.seqs + for (int j = 0; j < mapFileNames.size(); j++) { outputNames.push_back(mapFileNames[j]); outputTypes["map"].push_back(mapFileNames[j]); } + //deconvolute results by running unique.seqs + deconvoluteResults(outputFileName, nameFileName); if (m->control_pressed) { return 0; } @@ -227,13 +230,13 @@ int ShhhSeqsCommand::execute() { if (m->control_pressed) { m->mothurRemove(distFileName); return 0; } driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, outputFileName, nameFileName, mapFileName); + outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName); } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName); - outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName); m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -335,11 +338,12 @@ int ShhhSeqsCommand::loadData(correctDist* correct, seqNoise& noise, vector groups) { +vector ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFName, string newNName, string newMName, vector groups) { try { vector processIDS; int process = 1; + vector mapfileNames; //sanity check if (groups.size() < processors) { processors = groups.size(); } @@ -364,7 +368,18 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups); + mapfileNames = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups); + + //pass filenames to parent + ofstream out; + string tempFile = newMName + toString(getpid()) + ".temp"; + m->openOutputFile(tempFile, out); + out << mapfileNames.size() << endl; + for (int i = 0; i < mapfileNames.size(); i++) { + out << mapfileNames[i] << endl; + } + out.close(); + exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); @@ -374,7 +389,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa } //do my part - driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups); + mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;iopenInputFile(tempFile, in); + if (!in.eof()) { + int tempNum = 0; in >> tempNum; m->gobble(in); + for (int j = 0; j < tempNum; j++) { + string filename; + in >> filename; m->gobble(in); + mapfileNames.push_back(filename); + } + } + in.close(); m->mothurRemove(tempFile); + + } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -397,7 +428,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa for( int i=1; imapfileNames.size(); j++) { + mapfileNames.push_back(pDataArray[i]->mapfileNames[j]); + } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } @@ -431,7 +465,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa m->mothurRemove((newNName + toString(processIDS[i]) + ".temp")); } - return 0; + return mapfileNames; } catch(exception& e) { @@ -440,14 +474,16 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa } } /**************************************************************************************************/ -int ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector groups){ +vector ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector groups){ try { + vector mapFileNames; + for (int i = start; i < end; i++) { start = time(NULL); - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return mapFileNames; } m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine(); @@ -465,26 +501,27 @@ int ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, strin //load this groups info in order loadData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq, thisNameMap, thisSeqs); - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return mapFileNames; } //calc distances for cluster string distFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + groups[i] + ".shhh.dist"; correct->execute(distFileName); delete correct; - if (m->control_pressed) { m->mothurRemove(distFileName); return 0; } + if (m->control_pressed) { m->mothurRemove(distFileName); return mapFileNames; } driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, newFFile+groups[i], newNFile+groups[i], newMFile+groups[i]+".map"); - if (m->control_pressed) { return 0; } + if (m->control_pressed) { return mapFileNames; } m->appendFiles(newFFile+groups[i], newFFile); m->mothurRemove(newFFile+groups[i]); m->appendFiles(newNFile+groups[i], newNFile); m->mothurRemove(newNFile+groups[i]); + mapFileNames.push_back(newMFile+groups[i]+".map"); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + groups[i] + "."); m->mothurOutEndLine(); } - return 0; + return mapFileNames; } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "driverGroups"); @@ -666,7 +703,7 @@ int ShhhSeqsCommand::deconvoluteResults(string fastaFile, string nameFile){ string newfastaFile = filenames["fasta"][0]; m->mothurRemove(fastaFile); rename(newfastaFile.c_str(), fastaFile.c_str()); - m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); + if (nameFile != newnameFile) { m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); } return 0; } diff --git a/shhhseqscommand.h b/shhhseqscommand.h index 5aceca8..1b0211a 100644 --- a/shhhseqscommand.h +++ b/shhhseqscommand.h @@ -55,8 +55,8 @@ private: int loadData(correctDist*, seqNoise&, vector&, vector&, vector&, vector&, map&, vector&); int driver(seqNoise&, vector&, vector&, vector&, vector&, string, string, string, string); - int driverGroups(SequenceParser&, string, string, string, int, int, vector); - int createProcessesGroups(SequenceParser&, string, string, string, vector); + vector driverGroups(SequenceParser&, string, string, string, int, int, vector); + vector createProcessesGroups(SequenceParser&, string, string, string, vector); int deconvoluteResults(string, string); @@ -77,6 +77,7 @@ struct shhhseqsData { int end; int sigma, threadID; vector groups; + vector mapfileNames; shhhseqsData(){} shhhseqsData(string f, string n, string g, string nff, string nnf, string nmf, vector gr, MothurOut* mout, int st, int en, int s, int tid) { @@ -85,7 +86,7 @@ struct shhhseqsData { groupfile = g; newFName = nff; newNName = nnf; - newNName = nmf; + newMName = nmf; m = mout; start = st; end = en; @@ -115,7 +116,7 @@ static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){ if (pDataArray->m->control_pressed) { return 0; } pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Processing group " + pDataArray->groups[k] + ":"); pDataArray->m->mothurOutEndLine(); - + map thisNameMap; thisNameMap = parser.getNameMap(pDataArray->groups[k]); vector thisSeqs = parser.getSeqs(pDataArray->groups[k]); @@ -311,10 +312,9 @@ static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){ pDataArray->m->appendFiles(pDataArray->newFName+pDataArray->groups[k], pDataArray->newFName); pDataArray->m->mothurRemove(pDataArray->newFName+pDataArray->groups[k]); pDataArray->m->appendFiles(pDataArray->newNName+pDataArray->groups[k], pDataArray->newNName); pDataArray->m->mothurRemove(pDataArray->newNName+pDataArray->groups[k]); + pDataArray->mapfileNames.push_back(pDataArray->newMName+pDataArray->groups[k]+".map"); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + pDataArray->groups[k] + "."); pDataArray->m->mothurOutEndLine(); - - } return 0; @@ -322,7 +322,7 @@ static DWORD WINAPI MyShhhSeqsThreadFunction(LPVOID lpParam){ } catch(exception& e) { - pDataArray->m->errorOut(e, "PreClusterCommand", "MyPreclusterThreadFunction"); + pDataArray->m->errorOut(e, "ShhhSeqsCommand", "MyShhhSeqsThreadFunction"); exit(1); } } diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 7a1af62..8be4a6c 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -17,8 +17,8 @@ vector TrimFlowsCommand::setParameters(){ CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos); CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop); - CommandParameter pmaxflows("maxflows", "Number", "", "720", "", "", "",false,false); parameters.push_back(pmaxflows); - CommandParameter pminflows("minflows", "Number", "", "360", "", "", "",false,false); parameters.push_back(pminflows); + CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows); + CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); -- 2.39.2