From: Sarah Westcott Date: Wed, 23 May 2012 18:33:47 +0000 (-0400) Subject: added design and sets parameters to the rarefaction.shared command. finished work... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=ffc44592ff7ae94f14f9e21f87198e33d323cd1d added design and sets parameters to the rarefaction.shared command. finished work on large parameter in shhh.flows. --- diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 2c435ca..e2b9331 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -633,6 +633,7 @@ int ChimeraSlayerCommand::execute(){ #endif totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, trimFastaFileName); + m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); #ifdef USE_MPI } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait @@ -641,7 +642,7 @@ int ChimeraSlayerCommand::execute(){ if (parser != NULL) { delete parser; } - m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); } //set accnos file as new current accnosfile diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 0504e66..4e24490 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -467,7 +467,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { } } - } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand"); @@ -486,7 +485,7 @@ ClassifySeqsCommand::~ClassifySeqsCommand(){ int ClassifySeqsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } - + if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip); } else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand()); } else { @@ -507,8 +506,8 @@ int ClassifySeqsCommand::execute(){ string RippedTaxName = m->getRootName(m->getSimpleName(baseTName)); RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1)); if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); } - RippedTaxName += "."; - + if (RippedTaxName != "") { RippedTaxName += "."; } + if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); } string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy"; string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos"; diff --git a/groupmap.cpp b/groupmap.cpp index 481fd1d..92a43e9 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -73,7 +73,35 @@ int GroupMap::readDesignMap() { m->setAllGroups(namesOfGroups); return error; } - +/************************************************************/ +int GroupMap::readDesignMap(string filename) { + groupFileName = filename; + m->openInputFile(filename, fileHandle); + index = 0; + string seqName, seqGroup; + int error = 0; + + while(fileHandle){ + fileHandle >> seqName; m->gobble(fileHandle); //read from first column + fileHandle >> seqGroup; //read from second column + + if (m->control_pressed) { fileHandle.close(); return 1; } + + setNamesOfGroups(seqGroup); + + it = groupmap.find(seqName); + + if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 group named " + seqName + ", group names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[seqName] = seqGroup; //store data in map + seqsPerGroup[seqGroup]++; //increment number of seqs in that group + } + m->gobble(fileHandle); + } + fileHandle.close(); + m->setAllGroups(namesOfGroups); + return error; +} /************************************************************/ int GroupMap::getNumGroups() { return namesOfGroups.size(); } /************************************************************/ diff --git a/groupmap.h b/groupmap.h index 29c1741..567165d 100644 --- a/groupmap.h +++ b/groupmap.h @@ -17,11 +17,12 @@ class GroupMap { public: - GroupMap() {}; + GroupMap() { m = MothurOut::getInstance(); } GroupMap(string); ~GroupMap(); int readMap(); int readDesignMap(); + int readDesignMap(string); int getNumGroups(); bool isValidGroup(string); //return true if string is a valid group string getGroup(string); diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index 97f480e..8d1d7f7 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -240,7 +240,6 @@ int IndicatorCommand::execute(){ util.setGroups(Groups, nameGroups); designMap->setNamesOfGroups(nameGroups); - //loop through the Groups and fill Globaldata's Groups with the design file info vector namesSeqs = designMap->getNamesSeqs(Groups); m->setGroups(namesSeqs); } diff --git a/rarefactsharedcommand.cpp b/rarefactsharedcommand.cpp index 64bdbb4..726ddd6 100644 --- a/rarefactsharedcommand.cpp +++ b/rarefactsharedcommand.cpp @@ -10,17 +10,20 @@ #include "rarefactsharedcommand.h" #include "sharedsobs.h" #include "sharednseqs.h" +#include "sharedutilities.h" //********************************************************************************************************************** vector RareFactSharedCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared); + CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign); CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq); CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc); CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble); CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); + CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -39,7 +42,9 @@ string RareFactSharedCommand::getHelpString(){ string helpString = ""; ValidCalculators validCalculator; helpString += "The collect.shared command parameters are shared, label, freq, calc and groups. shared is required if there is no current sharedfile. \n"; - helpString += "The rarefaction.shared command parameters are shared, label, iters, groups, jumble and calc. shared is required if there is no current sharedfile. \n"; + helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble and calc. shared is required if there is no current sharedfile. \n"; + helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n"; + helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n"; helpString += "The rarefaction command should be in the following format: \n"; helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n"; helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; @@ -114,6 +119,15 @@ RareFactSharedCommand::RareFactSharedCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["shared"] = inputDir + it->second; } } + + it = parameters.find("design"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["design"] = inputDir + it->second; } + } + } //get shared file @@ -125,6 +139,11 @@ RareFactSharedCommand::RareFactSharedCommand(string option) { if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } }else { m->setSharedFile(sharedfile); } + + designfile = validParameter.validFile(parameters, "design", true); + if (designfile == "not open") { abort = true; designfile = ""; } + else if (designfile == "not found") { designfile = ""; } + else { m->setDesignFile(designfile); } //if the user changes the output directory command factory will send this info to us in the output parameter @@ -159,6 +178,12 @@ RareFactSharedCommand::RareFactSharedCommand(string option) { m->splitAtDash(groups, Groups); } m->setGroups(Groups); + + string sets = validParameter.validFile(parameters, "sets", false); + if (sets == "not found") { sets = ""; } + else { + m->splitAtDash(sets, Sets); + } string temp; temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } @@ -186,11 +211,73 @@ int RareFactSharedCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } - - string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + + GroupMap designMap; + if (designfile == "") { //fake out designMap to run with process + process(designMap, ""); + }else { + designMap.readDesignMap(designfile); + + //fill Sets - checks for "all" and for any typo groups + SharedUtil util; + vector nameSets = designMap.getNamesOfGroups(); + util.setGroups(Sets, nameSets); + + for (int i = 0; i < Sets.size(); i++) { + process(designMap, Sets[i]); + } + } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RareFactSharedCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** + +int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){ + try { + Rarefact* rCurve; + vector rDisplays; + + InputData input(sharedfile, "sharedfile"); + lookup = input.getSharedRAbundVectors(); + if (lookup.size() < 2) { + m->mothurOut("I cannot run the command without at least 2 valid groups."); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + return 0; + } + + string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + + vector newGroups = m->getGroups(); + if (thisSet != "") { //make groups only filled with groups from this set so that's all inputdata will read + vector thisSets; thisSets.push_back(thisSet); + newGroups = designMap.getNamesSeqs(thisSets); + fileNameRoot += thisSet + "."; + } + + vector subset; + if (thisSet == "") { subset.clear(); subset = lookup; } + else {//fill subset with this sets groups + subset.clear(); + for (int i = 0; i < lookup.size(); i++) { + if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) { + subset.push_back(lookup[i]); + } + } + } ValidCalculators validCalculator; - for (int i=0; igetSharedRAbundVectors(); - string lastLabel = lookup[0]->getLabel(); + if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } if (m->control_pressed) { - m->clearGroups(); - delete input; for(int i=0;imothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } - - - if (lookup.size() < 2) { - m->mothurOut("I cannot run the command without at least 2 valid groups."); - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - return 0; - } - + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + string lastLabel = subset[0]->getLabel(); set processedLabels; set userLabels = labels; - + //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { - m->clearGroups(); - delete input; for(int i=0;imothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } - if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ - m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - rCurve = new Rarefact(lookup, rDisplays); + if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){ + m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine(); + rCurve = new Rarefact(subset, rDisplays); rCurve->getSharedCurve(freq, nIters); delete rCurve; - - processedLabels.insert(lookup[0]->getLabel()); - userLabels.erase(lookup[0]->getLabel()); + + processedLabels.insert(subset[0]->getLabel()); + userLabels.erase(subset[0]->getLabel()); } - if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { - string saveLabel = lookup[0]->getLabel(); - - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - lookup = input->getSharedRAbundVectors(lastLabel); + if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = subset[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + + if (thisSet == "") { subset.clear(); subset = lookup; } + else {//fill subset with this sets groups + subset.clear(); + for (int i = 0; i < lookup.size(); i++) { + if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) { + subset.push_back(lookup[i]); + } + } + } - m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - rCurve = new Rarefact(lookup, rDisplays); - rCurve->getSharedCurve(freq, nIters); - delete rCurve; - - processedLabels.insert(lookup[0]->getLabel()); - userLabels.erase(lookup[0]->getLabel()); - - //restore real lastlabel to save below - lookup[0]->setLabel(saveLabel); + m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine(); + rCurve = new Rarefact(subset, rDisplays); + rCurve->getSharedCurve(freq, nIters); + delete rCurve; + + processedLabels.insert(subset[0]->getLabel()); + userLabels.erase(subset[0]->getLabel()); + + //restore real lastlabel to save below + subset[0]->setLabel(saveLabel); } - + - lastLabel = lookup[0]->getLabel(); + lastLabel = subset[0]->getLabel(); //get next line to process for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - lookup = input->getSharedRAbundVectors(); + lookup = input.getSharedRAbundVectors(); + + if (lookup[0] != NULL) { + if (thisSet == "") { subset.clear(); subset = lookup; } + else {//fill subset with this sets groups + subset.clear(); + for (int i = 0; i < lookup.size(); i++) { + if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) { + subset.push_back(lookup[i]); + } + } + } + }else { subset.clear(); subset.push_back(NULL); } + } if (m->control_pressed) { - m->clearGroups(); - delete input; for(int i=0;imothurRemove(outputNames[i]); } return 0; @@ -299,8 +394,6 @@ int RareFactSharedCommand::execute(){ } if (m->control_pressed) { - m->clearGroups(); - delete input; for(int i=0;imothurRemove(outputNames[i]); } return 0; @@ -309,33 +402,33 @@ int RareFactSharedCommand::execute(){ //run last label if you need to if (needToRun == true) { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } - lookup = input->getSharedRAbundVectors(lastLabel); - - m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - rCurve = new Rarefact(lookup, rDisplays); + lookup = input.getSharedRAbundVectors(lastLabel); + + if (thisSet == "") { subset.clear(); subset = lookup; } + else {//fill subset with this sets groups + subset.clear(); + for (int i = 0; i < lookup.size(); i++) { + if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) { + subset.push_back(lookup[i]); + } + } + } + + m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine(); + rCurve = new Rarefact(subset, rDisplays); rCurve->getSharedCurve(freq, nIters); delete rCurve; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } for(int i=0;iclearGroups(); - delete input; - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } - - m->mothurOutEndLine(); - m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } - m->mothurOutEndLine(); - return 0; - } + + return 0; + } catch(exception& e) { - m->errorOut(e, "RareFactSharedCommand", "execute"); + m->errorOut(e, "RareFactSharedCommand", "process"); exit(1); } } - - //********************************************************************************************************************** diff --git a/rarefactsharedcommand.h b/rarefactsharedcommand.h index 2112e8d..1a2d944 100644 --- a/rarefactsharedcommand.h +++ b/rarefactsharedcommand.h @@ -36,17 +36,16 @@ public: private: vector lookup; - InputData* input; - Rarefact* rCurve; - vector rDisplays; int nIters; string format; float freq; bool abort, allLines, jumble; set labels; //holds labels to be used - string label, calc, groups, outputDir, sharedfile; - vector Estimators, Groups, outputNames; + string label, calc, groups, outputDir, sharedfile, designfile; + vector Estimators, Groups, outputNames, Sets; + + int process(GroupMap&, string); }; diff --git a/shhhercommand.cpp b/shhhercommand.cpp index a09825e..5bcd7d8 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -2046,18 +2046,24 @@ vector ShhherCommand::parseFlowFiles(string filename){ out << thisNumFLows << endl; files.push_back(outputFileName); + int numLinesWrote = 0; for (int i = 0; i < largeSize; i++) { if (in.eof()) { break; } - string line = m->getline(in); + string line = m->getline(in); m->gobble(in); out << line << endl; + numLinesWrote++; } out.close(); + + if (numLinesWrote == 0) { m->mothurRemove(outputFileName); files.pop_back(); } count++; } in.close(); if (m->control_pressed) { for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); } files.clear(); } + m->mothurOut("\nDivided " + filename + " into " + toString(files.size()) + " files.\n\n"); + return files; } catch(exception& e) { @@ -2263,8 +2269,8 @@ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFil m->mothurRemove(otuCountsFileName); m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.groups")); m->mothurRemove(groupFileName); - m->mothurRemove(theseFlowFileNames[g]); } + m->mothurRemove(theseFlowFileNames[g]); } } @@ -3319,7 +3325,9 @@ void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int num //otuCountsFile << base; } } - otuCountsFile << newSeq.substr(4) << endl; + + if (newSeq.length() >= 4) { otuCountsFile << newSeq.substr(4) << endl; } + else { otuCountsFile << "NNNN" << endl; } } otuCountsFile << endl; }