From: westcott Date: Tue, 2 Jun 2009 16:02:18 +0000 (+0000) Subject: added smart distance feature and optimized all commands using line by line processing X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=a8f5a612bba54ceb74e17efc027d3a7f5aa93c9a added smart distance feature and optimized all commands using line by line processing --- diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp index f1ba8b5..c217578 100644 --- a/binsequencecommand.cpp +++ b/binsequencecommand.cpp @@ -53,7 +53,7 @@ BinSeqCommand::~BinSeqCommand(){ int BinSeqCommand::execute(){ try { int count = 1; - string binnames, name, sequence; + int error = 0; //read fastafile fasta->readFastaFile(in); @@ -72,11 +72,117 @@ int BinSeqCommand::execute(){ input = globaldata->ginput; list = globaldata->gListVector; + ListVector* lastList = list; + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + - while(list != NULL){ + while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ + error = process(list, count); + if (error == 1) { return 0; } + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + } + + if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) { + + error = process(lastList, count); + if (error == 1) { return 0; } + + processedLabels.insert(lastList->getLabel()); + userLabels.erase(lastList->getLabel()); + + } + + if (count != 1) { delete lastList; } + lastList = list; + + list = input->getListVector(); + count++; + } + + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastList->getLabel()) != 1) { + cout << ". I will use " << lastList->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastList->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + error = process(lastList, count); + if (error == 1) { return 0; } + } + + delete lastList; + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** +void BinSeqCommand::readNamesFile() { + try { + vector dupNames; + openInputFile(namesfile, inNames); + + string name, names, sequence; + + while(inNames){ + inNames >> name; //read from first column A + inNames >> names; //read from second column A,B,C,D + + dupNames.clear(); + + //parse names into vector + splitAtComma(names, dupNames); + + //store names in fasta map + sequence = fasta->getSequence(name); + for (int i = 0; i < dupNames.size(); i++) { + fasta->push_back(dupNames[i], sequence); + } + + gobble(inNames); + } + inNames.close(); + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** +//return 1 if error, 0 otherwise +int BinSeqCommand::process(ListVector* list, int count) { + try { + string binnames, name, sequence; string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta"; openOutputFile(outputFileName, out); @@ -103,7 +209,7 @@ int BinSeqCommand::execute(){ if (group == "not found") { cout << name << " is missing from your group file. Please correct. " << endl; remove(outputFileName.c_str()); - return 0; + return 1; }else{ name = name + "|" + group + "|" + toString(i+1); out << ">" << name << endl; @@ -113,7 +219,7 @@ int BinSeqCommand::execute(){ }else { cout << name << " is missing from your fasta or name file. Please correct. " << endl; remove(outputFileName.c_str()); - return 0; + return 1; } } @@ -131,7 +237,7 @@ int BinSeqCommand::execute(){ if (group == "not found") { cout << binnames << " is missing from your group file. Please correct. " << endl; remove(outputFileName.c_str()); - return 0; + return 1; }else{ binnames = binnames + "|" + group + "|" + toString(i+1); out << ">" << binnames << endl; @@ -141,56 +247,12 @@ int BinSeqCommand::execute(){ }else { cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; remove(outputFileName.c_str()); - return 0; + return 1; } - } + out.close(); - } - - delete list; - list = input->getListVector(); - count++; - } - - return 0; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -//********************************************************************************************************************** -void BinSeqCommand::readNamesFile() { - try { - vector dupNames; - openInputFile(namesfile, inNames); - - string name, names, sequence; - - while(inNames){ - inNames >> name; //read from first column A - inNames >> names; //read from second column A,B,C,D - - dupNames.clear(); - - //parse names into vector - splitAtComma(names, dupNames); - - //store names in fasta map - sequence = fasta->getSequence(name); - for (int i = 0; i < dupNames.size(); i++) { - fasta->push_back(dupNames[i], sequence); - } - - gobble(inNames); - } - inNames.close(); + return 0; } catch(exception& e) { @@ -205,4 +267,3 @@ void BinSeqCommand::readNamesFile() { //********************************************************************************************************************** - diff --git a/binsequencecommand.h b/binsequencecommand.h index e2ff648..bc2f883 100644 --- a/binsequencecommand.h +++ b/binsequencecommand.h @@ -40,6 +40,7 @@ private: ifstream in, inNames; void readNamesFile(); + int process(ListVector*, int); }; #endif diff --git a/bootstrapsharedcommand.cpp b/bootstrapsharedcommand.cpp index 6b1f338..2835061 100644 --- a/bootstrapsharedcommand.cpp +++ b/bootstrapsharedcommand.cpp @@ -90,8 +90,6 @@ BootSharedCommand::~BootSharedCommand(){ int BootSharedCommand::execute(){ try { int count = 1; - EstOutput data; - vector subset; //if the users entered no valid calculators don't execute command if (treeCalculators.size() == 0) { return 0; } @@ -101,6 +99,11 @@ int BootSharedCommand::execute(){ read->read(&*globaldata); input = globaldata->ginput; order = input->getSharedOrderVector(); + SharedOrderVector* lastOrder = order; + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; //set users groups util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup"); @@ -117,69 +120,57 @@ int BootSharedCommand::execute(){ tmap->makeSim(globaldata->gGroupmap); globaldata->gTreemap = tmap; - while(order != NULL){ + while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ cout << order->getLabel() << '\t' << count << endl; + process(order); - //open an ostream for each calc to print to - for (int z = 0; z < treeCalculators.size(); z++) { - //create a new filename - outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre"; - openOutputFile(outputFile, *(out[z])); - } - - //create a file for each calculator with the 1000 trees in it. - for (int p = 0; p < iters; p++) { - - util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order); //fills group vectors from order vector. - - //for each calculator - for(int i = 0 ; i < treeCalculators.size(); i++) { - - //initialize simMatrix - simMatrix.clear(); - simMatrix.resize(numGroups); - for (int m = 0; m < simMatrix.size(); m++) { - for (int j = 0; j < simMatrix.size(); j++) { - simMatrix[m].push_back(0.0); - } - } - - //initialize index - index.clear(); - for (int g = 0; g < numGroups; g++) { index[g] = g; } - - for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare - for (int l = k; l < lookup.size(); l++) { - if (k != l) { //we dont need to similiarity of a groups to itself - subset.clear(); //clear out old pair of sharedrabunds - //add new pair of sharedrabunds - subset.push_back(lookup[k]); subset.push_back(lookup[l]); - - //get estimated similarity between 2 groups - data = treeCalculators[i]->getValues(subset); //saves the calculator outputs - //save values in similarity matrix - simMatrix[k][l] = data[0]; - simMatrix[l][k] = data[0]; - } - } - } + processedLabels.insert(order->getLabel()); + userLabels.erase(order->getLabel()); - //creates tree from similarity matrix and write out file - createTree(out[i]); - } - } - //close ostream for each calc - for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); } + //you have a label the user want that is smaller than this line and the last line has not already been processed } - + + if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastOrder->getLabel()) != 1)) { + + cout << lastOrder->getLabel() << '\t' << count << endl; + process(lastOrder); + + processedLabels.insert(lastOrder->getLabel()); + userLabels.erase(lastOrder->getLabel()); + } + + if (count != 1) { delete lastOrder; } + lastOrder = order; + //get next line to process order = input->getSharedOrderVector(); count++; } + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastOrder->getLabel()) != 1) { + cout << ". I will use " << lastOrder->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastOrder->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + process(lastOrder); + cout << lastOrder->getLabel() << '\t' << count << endl; + } + + delete lastOrder; + //reset groups parameter globaldata->Groups.clear(); globaldata->setGroups(""); @@ -293,5 +284,73 @@ void BootSharedCommand::printSims() { } } /***********************************************************/ +void BootSharedCommand::process(SharedOrderVector* order) { + try{ + EstOutput data; + vector subset; + + //open an ostream for each calc to print to + for (int z = 0; z < treeCalculators.size(); z++) { + //create a new filename + outputFile = getRootName(globaldata->inputFileName) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre"; + openOutputFile(outputFile, *(out[z])); + } + + //create a file for each calculator with the 1000 trees in it. + for (int p = 0; p < iters; p++) { + + util->getSharedVectorswithReplacement(globaldata->Groups, lookup, order); //fills group vectors from order vector. + + //for each calculator + for(int i = 0 ; i < treeCalculators.size(); i++) { + + //initialize simMatrix + simMatrix.clear(); + simMatrix.resize(numGroups); + for (int m = 0; m < simMatrix.size(); m++) { + for (int j = 0; j < simMatrix.size(); j++) { + simMatrix[m].push_back(0.0); + } + } + + //initialize index + index.clear(); + for (int g = 0; g < numGroups; g++) { index[g] = g; } + + for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare + for (int l = k; l < lookup.size(); l++) { + if (k != l) { //we dont need to similiarity of a groups to itself + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(lookup[k]); subset.push_back(lookup[l]); + + //get estimated similarity between 2 groups + data = treeCalculators[i]->getValues(subset); //saves the calculator outputs + //save values in similarity matrix + simMatrix[k][l] = data[0]; + simMatrix[l][k] = data[0]; + } + } + } + + //creates tree from similarity matrix and write out file + createTree(out[i]); + } + } + //close ostream for each calc + for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the BootSharedCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the BootSharedCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************/ + diff --git a/bootstrapsharedcommand.h b/bootstrapsharedcommand.h index 322cd28..b632ace 100644 --- a/bootstrapsharedcommand.h +++ b/bootstrapsharedcommand.h @@ -12,7 +12,6 @@ #include "command.hpp" #include "sharedordervector.h" -#include "sharedlistvector.h" #include "inputdata.h" #include "groupmap.h" #include "readotu.h" @@ -33,6 +32,8 @@ public: private: void createTree(ostream*); void printSims(); + void process(SharedOrderVector*); + GlobalData* globaldata; SharedUtil* util; @@ -45,7 +46,6 @@ private: map index; //maps row in simMatrix to vector index in the tree InputData* input; ValidCalculators* validCalculator; - SharedListVector* SharedList; SharedOrderVector* order; vector lookup; string format, outputFile; diff --git a/collectcommand.cpp b/collectcommand.cpp index d317a06..01f2fff 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -36,8 +36,10 @@ CollectCommand::CollectCommand(){ globaldata = GlobalData::getInstance(); string fileNameRoot; fileNameRoot = getRootName(globaldata->inputFileName); + convert(globaldata->getFreq(), freq); int i; validCalculator = new ValidCalculators(); + for (i=0; iEstimators.size(); i++) { if (validCalculator->isValidCalculator("single", globaldata->Estimators[i]) == true) { if (globaldata->Estimators[i] == "sobs") { @@ -125,31 +127,68 @@ int CollectCommand::execute(){ read->read(&*globaldata); order = globaldata->gorder; + lastOrder = order; input = globaldata->ginput; - set orderList; - while(order != NULL){ + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + + while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { - orderList.insert(order->getLabel()); if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ + cCurve = new Collect(order, cDisplays); - convert(globaldata->getFreq(), freq); cCurve->getCurve(freq); - delete cCurve; cout << order->getLabel() << '\t' << count << endl; + processedLabels.insert(order->getLabel()); + userLabels.erase(order->getLabel()); + + //you have a label the user want that is smaller than this line and the last line has not already been processed + } + + if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastOrder->getLabel()) != 1)) { + cCurve = new Collect(lastOrder, cDisplays); + cCurve->getCurve(freq); + delete cCurve; + + cout << lastOrder->getLabel() << '\t' << count << endl; + processedLabels.insert(lastOrder->getLabel()); + userLabels.erase(lastOrder->getLabel()); } - delete order; + if (count != 1) { delete lastOrder; } + lastOrder = order; order = (input->getOrderVector()); count++; } - set::iterator i; - for(i = globaldata->labels.begin(); i != globaldata->labels.end(); ++i) - if(orderList.count(*i) == 0) - cout << "'" << *i << "'" << " is not a valid label.\n"; - for(int i=0;i::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastOrder->getLabel()) != 1) { + cout << ". I will use " << lastOrder->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastOrder->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cCurve = new Collect(lastOrder, cDisplays); + cCurve->getCurve(freq); + delete cCurve; + + cout << lastOrder->getLabel() << '\t' << count << endl; + } + + delete lastOrder; + for(int i=0;iinputFileName); format = globaldata->getFormat(); + convert(globaldata->getFreq(), freq); validCalculator = new ValidCalculators(); util = new SharedUtil(); @@ -134,39 +135,74 @@ int CollectSharedCommand::execute(){ input = globaldata->ginput; order = input->getSharedOrderVector(); + SharedOrderVector* lastOrder = order; - set orderList; - + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + //set users groups util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect"); util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex); - while(order != NULL){ + while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { - orderList.insert(order->getLabel()); - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ //create collectors curve cCurve = new Collect(order, cDisplays); - convert(globaldata->getFreq(), freq); cCurve->getSharedCurve(freq); - delete cCurve; cout << order->getLabel() << '\t' << count << endl; + processedLabels.insert(order->getLabel()); + userLabels.erase(order->getLabel()); + + //you have a label the user want that is smaller than this line and the last line has not already been processed + } + + if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastOrder->getLabel()) != 1)) { + //create collectors curve + cCurve = new Collect(lastOrder, cDisplays); + cCurve->getSharedCurve(freq); + delete cCurve; + + cout << lastOrder->getLabel() << '\t' << count << endl; + processedLabels.insert(lastOrder->getLabel()); + userLabels.erase(lastOrder->getLabel()); } + if (count != 1) { delete lastOrder; } + lastOrder = order; + //get next line to process - delete order; order = input->getSharedOrderVector(); count++; } - set::iterator i; - for(i = globaldata->labels.begin(); i != globaldata->labels.end(); ++i) - if(orderList.count(*i) == 0) - cout << "'" << *i << "'" << " is not a valid label.\n"; - + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastOrder->getLabel()) != 1) { + cout << ". I will use " << lastOrder->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastOrder->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cCurve = new Collect(lastOrder, cDisplays); + cCurve->getCurve(freq); + delete cCurve; + + cout << lastOrder->getLabel() << '\t' << count << endl; + } + + delete lastOrder; for(int i=0;ireadFastaFile(in); @@ -96,50 +94,66 @@ int GetOTURepCommand::execute(){ input = globaldata->ginput; list = globaldata->gListVector; + ListVector* lastList = list; - while(list != NULL){ + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + + + while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ - - //create output file - string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".rep.fasta"; - openOutputFile(outputFileName, out); - - cout << list->getLabel() << '\t' << count << endl; - - //for each bin in the list vector - for (int i = 0; i < list->size(); i++) { - string groups; - nameRep = FindRep(i, groups); + cout << list->getLabel() << '\t' << count << endl; + error = process(list); + if (error == 1) { return 0; } //there is an error in hte input files, abort command - //print out name and sequence for that bin - sequence = fasta->getSequence(nameRep); - - if (sequence != "not found") { - if (groupfile == "") { - nameRep = nameRep + "|" + toString(i+1); - out << ">" << nameRep << endl; - out << sequence << endl; - }else { - nameRep = nameRep + "|" + groups + "|" + toString(i+1); - out << ">" << nameRep << endl; - out << sequence << endl; - } - }else { - cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl; - remove(outputFileName.c_str()); - return 0; - } - } - - out.close(); + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + } + + if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) { + cout << lastList->getLabel() << '\t' << count << endl; + error = process(lastList); + if (error == 1) { return 0; } //there is an error in hte input files, abort command + + processedLabels.insert(lastList->getLabel()); + userLabels.erase(lastList->getLabel()); } + if (count != 1) { delete lastList; } + lastList = list; + list = input->getListVector(); count++; } - + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastList->getLabel()) != 1) { + cout << ". I will use " << lastList->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastList->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastList->getLabel() << '\t' << count << endl; + error = process(lastList); + if (error == 1) { return 0; } //there is an error in hte input files, abort command + } + delete lastList; + + delete matrix; + globaldata->gSparseMatrix = NULL; + delete list; + globaldata->gListVector = NULL; + return 0; } catch(exception& e) { @@ -191,7 +205,7 @@ void GetOTURepCommand::readNamesFile() { } } //********************************************************************************************************************** -string GetOTURepCommand::FindRep(int bin, string& group) { +string GetOTURepCommand::FindRep(int bin, string& group, ListVector* thisList) { try{ vector names; map sums; @@ -203,7 +217,7 @@ string GetOTURepCommand::FindRep(int bin, string& group) { map groups; map::iterator groupIt; - binnames = list->get(bin); + binnames = thisList->get(bin); //parse names into vector splitAtComma(binnames, names); @@ -282,6 +296,56 @@ string GetOTURepCommand::FindRep(int bin, string& group) { } } +//********************************************************************************************************************** +int GetOTURepCommand::process(ListVector* processList) { + try{ + string nameRep, name, sequence; + + //create output file + string outputFileName = getRootName(globaldata->getListFile()) + processList->getLabel() + ".rep.fasta"; + openOutputFile(outputFileName, out); + + //for each bin in the list vector + for (int i = 0; i < processList->size(); i++) { + string groups; + nameRep = FindRep(i, groups, processList); + + //print out name and sequence for that bin + sequence = fasta->getSequence(nameRep); + + if (sequence != "not found") { + if (groupfile == "") { + nameRep = nameRep + "|" + toString(i+1); + out << ">" << nameRep << endl; + out << sequence << endl; + }else { + nameRep = nameRep + "|" + groups + "|" + toString(i+1); + out << ">" << nameRep << endl; + out << sequence << endl; + } + }else { + cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl; + remove(outputFileName.c_str()); + return 1; + } + } + + out.close(); + return 0; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GetOTURepCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + diff --git a/getoturepcommand.h b/getoturepcommand.h index deafc78..8153c36 100644 --- a/getoturepcommand.h +++ b/getoturepcommand.h @@ -52,7 +52,8 @@ private: map::iterator it3; void readNamesFile(); - string FindRep(int, string&); // returns name of "representative" sequence of given bin. //and fill a string containing the groups in that bin if a groupfile is given + int process(ListVector*); + string FindRep(int, string&, ListVector*); // returns name of "representative" sequence of given bin. //and fill a string containing the groups in that bin if a groupfile is given }; diff --git a/globaldata.cpp b/globaldata.cpp index 405e646..a8e88c5 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -97,21 +97,22 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "gapopen") { gapopen = value; } if (key == "gapextend" ) { gapextend = value; } - if (key == "line") {//stores lines to be used in a set + if (key == "line") {//stores lines to be used in a vector lines.clear(); labels.clear(); line = value; label = ""; - splitAtDash(value, lines); - allLines = 0; + if (line != "all") { splitAtDash(value, lines); allLines = 0; } + else { allLines = 1; } } - if (key == "label") {//stores labels to be used in a set + + if (key == "label") {//stores lines to be used in a vector labels.clear(); lines.clear(); label = value; line = ""; - splitAtDash(value, labels); - allLines = 0; + if (label != "all") { splitAtDash(value, labels); allLines = 0; } + else { allLines = 1; } } if (key == "groups") {//stores groups to be used in a vector diff --git a/heatmapcommand.cpp b/heatmapcommand.cpp index 09dfb5a..c9f6ae5 100644 --- a/heatmapcommand.cpp +++ b/heatmapcommand.cpp @@ -40,6 +40,8 @@ HeatMapCommand::~HeatMapCommand(){ int HeatMapCommand::execute(){ try { int count = 1; + RAbundVector* lastRAbund; + vector lastLookup; if (format == "sharedfile") { //you have groups @@ -48,50 +50,126 @@ int HeatMapCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); + lastLookup = lookup; }else if (format == "list") { //you are using just a list file and have only one group read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); rabund = globaldata->rabund; + lastRAbund = globaldata->rabund; input = globaldata->ginput; } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + if (format != "list") { - while(lookup[0] != NULL){ + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ cout << lookup[0]->getLabel() << '\t' << count << endl; heatmap->getPic(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); } - //prevent memory leak - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + heatmap->getPic(lastLookup); + + processedLabels.insert(lastLookup[0]->getLabel()); + userLabels.erase(lastLookup[0]->getLabel()); + } + //prevent memory leak + if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } } + lastLookup = lookup; + //get next line to process lookup = input->getSharedRAbundVectors(); count++; } + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastLookup[0]->getLabel()) != 1) { + cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + heatmap->getPic(lastLookup); + } + + for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } + //reset groups parameter globaldata->Groups.clear(); }else{ - while(rabund != NULL){ - + while((rabund != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(rabund->getLabel()) == 1){ cout << rabund->getLabel() << '\t' << count << endl; heatmap->getPic(rabund); + + processedLabels.insert(rabund->getLabel()); + userLabels.erase(rabund->getLabel()); } - delete rabund; + if ((anyLabelsToProcess(rabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastRAbund->getLabel()) != 1)) { + + cout << lastRAbund->getLabel() << '\t' << count << endl; + heatmap->getPic(lastRAbund); + + processedLabels.insert(lastRAbund->getLabel()); + userLabels.erase(lastRAbund->getLabel()); + } + + if (count != 1) { delete lastRAbund; } + lastRAbund = rabund; + rabund = input->getRAbundVector(); count++; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastRAbund->getLabel()) != 1) { + cout << ". I will use " << lastRAbund->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastRAbund->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastRAbund->getLabel() << '\t' << count << endl; + heatmap->getPic(lastRAbund); + } + + delete lastRAbund; + } globaldata->setGroups(""); diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index c792cc9..3ecf6df 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -79,9 +79,7 @@ MatrixOutputCommand::~MatrixOutputCommand(){ int MatrixOutputCommand::execute(){ try { int count = 1; - EstOutput data; - vector subset; - + //if the users entered no valid calculators don't execute command if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; } @@ -91,62 +89,66 @@ int MatrixOutputCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); + vector lastLookup = lookup; + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0;} numGroups = globaldata->Groups.size(); - while(lookup[0] != NULL){ + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ - cout << lookup[0]->getLabel() << '\t' << count << endl; + process(lookup); - //for each calculator - for(int i = 0 ; i < matrixCalculators.size(); i++) { - - //initialize simMatrix - simMatrix.clear(); - simMatrix.resize(numGroups); - for (int m = 0; m < simMatrix.size(); m++) { - for (int j = 0; j < simMatrix.size(); j++) { - simMatrix[m].push_back(0.0); - } - } - - for (int k = 0; k < lookup.size(); k++) { - for (int l = k; l < lookup.size(); l++) { - if (k != l) { //we dont need to similiarity of a groups to itself - //get estimated similarity between 2 groups - - subset.clear(); //clear out old pair of sharedrabunds - //add new pair of sharedrabunds - subset.push_back(lookup[k]); subset.push_back(lookup[l]); - - data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs - //save values in similarity matrix - simMatrix[k][l] = 1.0 - data[0]; //convert similiarity to distance - simMatrix[l][k] = 1.0 - data[0]; //convert similiarity to distance - } - } - } - - exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".dist"; - openOutputFile(exportFileName, out); - printSims(out); - out.close(); - - } + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); } + if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + process(lastLookup); + + processedLabels.insert(lastLookup[0]->getLabel()); + userLabels.erase(lastLookup[0]->getLabel()); + } + //prevent memory leak - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } } + lastLookup = lookup; //get next line to process lookup = input->getSharedRAbundVectors(); count++; } + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastLookup[0]->getLabel()) != 1) { + cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + process(lastLookup); + } + + for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } + + //reset groups parameter globaldata->Groups.clear(); globaldata->setGroups(""); @@ -187,5 +189,61 @@ void MatrixOutputCommand::printSims(ostream& out) { } } /***********************************************************/ +void MatrixOutputCommand::process(vector thisLookup){ + try { + + EstOutput data; + vector subset; + + //for each calculator + for(int i = 0 ; i < matrixCalculators.size(); i++) { + + //initialize simMatrix + simMatrix.clear(); + simMatrix.resize(numGroups); + for (int m = 0; m < simMatrix.size(); m++) { + for (int j = 0; j < simMatrix.size(); j++) { + simMatrix[m].push_back(0.0); + } + } + + for (int k = 0; k < thisLookup.size(); k++) { + for (int l = k; l < thisLookup.size(); l++) { + if (k != l) { //we dont need to similiarity of a groups to itself + //get estimated similarity between 2 groups + + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); + + data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs + //save values in similarity matrix + simMatrix[k][l] = 1.0 - data[0]; //convert similiarity to distance + simMatrix[l][k] = 1.0 - data[0]; //convert similiarity to distance + } + } + } + + exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist"; + openOutputFile(exportFileName, out); + printSims(out); + out.close(); + + } + + + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the MatrixOutputCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************/ + diff --git a/matrixoutputcommand.h b/matrixoutputcommand.h index 04d0ccb..547b567 100644 --- a/matrixoutputcommand.h +++ b/matrixoutputcommand.h @@ -30,6 +30,7 @@ public: private: void printSims(ostream&); + void process(vector); GlobalData* globaldata; ReadOTUFile* read; @@ -41,6 +42,7 @@ private: string exportFileName; int numGroups; ofstream out; + }; diff --git a/mothur.h b/mothur.h index 2c9595e..07bb971 100644 --- a/mothur.h +++ b/mothur.h @@ -73,6 +73,19 @@ inline void convert(const string& s, T& x, bool failIfLeftoverChars = true){ } //********************************************************************************************************************** +template +inline bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){ + istringstream i(s); + char c; + if (!(i >> x) || (failIfLeftoverChars && i.get(c))) + { + return false; + } + return true; +} + +//********************************************************************************************************************** + template inline bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){ istringstream i(s); @@ -465,8 +478,87 @@ inline bool inUsersGroups(string groupname, vector Groups) { } } -/**************************************************************************************************/ +/***********************************************************************/ +//this function determines if the user has given us labels that are smaller than the given label. +//if so then it returns true so that the calling function can run the previous valid distance. +//it's a "smart" distance function. It also checks for invalid labels. +inline bool anyLabelsToProcess(string label, set& userLabels, string errorOff) { + try { + set::iterator it; + vector orderFloat; + map userMap; //the conversion process removes trailing 0's which we need to put back + map::iterator it2; + float labelFloat; + bool smaller = false; + + //unique is the smallest line + if (label == "unique") { return false; } + else { convert(label, labelFloat); } + + //go through users set and make them floats + for(it = userLabels.begin(); it != userLabels.end(); ++it) { + + float temp; + if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){ + convert(*it, temp); + orderFloat.push_back(temp); + userMap[*it] = temp; + }else if (*it == "unique") { + orderFloat.push_back(-1.0); + userMap["unique"] = -1.0; + }else { + if (errorOff == "") { cout << *it << " is not a valid label." << endl; } + userLabels.erase(*it); + it--; + } + } + + //sort order + sort(orderFloat.begin(), orderFloat.end()); + + /*************************************************/ + //is this label bigger than any of the users labels + /*************************************************/ + + //loop through order until you find a label greater than label + for (int i = 0; i < orderFloat.size(); i++) { + if (orderFloat[i] < labelFloat) { + smaller = true; + if (orderFloat[i] == -1) { + if (errorOff == "") { cout << "Your file does not include the label unique." << endl; } + userLabels.erase("unique"); + } + else { + if (errorOff == "") { cout << "Your file does not include the label "; } + string s = ""; + for (it2 = userMap.begin(); it2!= userMap.end(); it2++) { + if (it2->second == orderFloat[i]) { + s = it2->first; + //remove small labels + userLabels.erase(s); + break; + } + } + if (errorOff == "") { cout << s << ". I will use the next smallest distance. " << endl; } + } + //since they are sorted once you find a bigger one stop looking + }else { break; } + } + + return smaller; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur class Function anyLabelsToProcess. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the mothur class function anyLabelsToProcess. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/**************************************************************************************************/ #endif diff --git a/parselistcommand.cpp b/parselistcommand.cpp index f144798..20f92f7 100644 --- a/parselistcommand.cpp +++ b/parselistcommand.cpp @@ -17,7 +17,8 @@ ParseListCommand::ParseListCommand(){ //read in group map info. groupMap = new GroupMap(globaldata->getGroupFile()); groupMap->readMap(); - + + //fill filehandles with neccessary ofstreams int i; ofstream* temp; @@ -44,7 +45,7 @@ ParseListCommand::ParseListCommand(){ } } /***********************************************************************/ -void ParseListCommand::parse(int index) { +void ParseListCommand::parse(int index, SharedListVector* list) { try { string prefix, suffix, groupsName; suffix = list->get(index); @@ -93,49 +94,71 @@ int ParseListCommand::execute(){ read->read(&*globaldata); input = globaldata->ginput; list = globaldata->gSharedList; + SharedListVector* lastList = list; + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; //read in group map info. groupMap = new GroupMap(globaldata->getGroupFile()); groupMap->readMap(); - string seq, label; - int i; //create new list vectors to fill with parsed data - for (i=0; igetNumGroups(); i++) { + for (int i=0; igetNumGroups(); i++) { groupOfLists[groupMap->namesOfGroups[i]] = new SharedListVector(); } + //parses and sets each groups listvector - while(list != NULL){ - label = list->getLabel(); - - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(label) == 1){ - - for(i=0; isize(); i++) { - parse(i); //parses data[i] list of sequence names - for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors - seq = it->second; - seq = seq.substr(1, seq.length()); //rips off extra comma - groupOfLists[it->first]->push_back(seq); //sets new listvector for each group - } - listGroups.clear(); - } - //prints each new list file - for (i=0; igetNumGroups(); i++) { - groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label); - groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]])); - groupOfLists[groupMap->namesOfGroups[i]]->clear(); - } + //as long as you are not at the end of the file or done wih the lines you want + while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { + + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ + cout << list->getLabel() << '\t' << count << endl; + process(list); - cout << label << '\t' << count << endl; + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); } + if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) { + cout << lastList->getLabel() << '\t' << count << endl; + process(lastList); + + processedLabels.insert(lastList->getLabel()); + userLabels.erase(lastList->getLabel()); + } + + if (count != 1) { delete lastList; } + lastList = list; + list = input->getSharedListVector(); count++; } + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastList->getLabel()) != 1) { + cout << ". I will use " << lastList->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastList->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastList->getLabel() << '\t' << count << endl; + process(lastList); + } + delete lastList; + //set groupmap for .shared commands - //if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; } + if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; } globaldata->gGroupmap = groupMap; return 0; @@ -158,3 +181,33 @@ ParseListCommand::~ParseListCommand(){ delete read; } //********************************************************************************************************************** +void ParseListCommand::process(SharedListVector* thisList) { + try { + string seq; + + for(int i=0; isize(); i++) { + parse(i, thisList); //parses data[i] list of sequence names + for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors + seq = it->second; + seq = seq.substr(1, seq.length()); //rips off extra comma + groupOfLists[it->first]->push_back(seq); //sets new listvector for each group + } + listGroups.clear(); + } + //prints each new list file + for (int i=0; igetNumGroups(); i++) { + groupOfLists[groupMap->namesOfGroups[i]]->setLabel(thisList->getLabel()); + groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]])); + groupOfLists[groupMap->namesOfGroups[i]]->clear(); + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ParseListCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ParseListCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} \ No newline at end of file diff --git a/parselistcommand.h b/parselistcommand.h index 0477acd..4aa4ccd 100644 --- a/parselistcommand.h +++ b/parselistcommand.h @@ -43,9 +43,9 @@ private: SharedListVector* list; map listGroups; //maps group name to sequences from that group in a specific OTU map::iterator it; - map::iterator it2; map::iterator it3; - void parse(int); + void parse(int, SharedListVector*); + void process(SharedListVector*); string fileroot; }; diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 981f077..752ff62 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -27,6 +27,8 @@ RareFactCommand::RareFactCommand(){ globaldata = GlobalData::getInstance(); string fileNameRoot; fileNameRoot = getRootName(globaldata->inputFileName); + convert(globaldata->getFreq(), freq); + convert(globaldata->getIters(), nIters); validCalculator = new ValidCalculators(); int i; @@ -96,28 +98,68 @@ int RareFactCommand::execute(){ read->read(&*globaldata); order = globaldata->gorder; + OrderVector* lastOrder = order; input = globaldata->ginput; - - while(order != NULL){ + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ rCurve = new Rarefact(order, rDisplays); - convert(globaldata->getFreq(), freq); - convert(globaldata->getIters(), nIters); rCurve->getCurve(freq, nIters); - delete rCurve; cout << order->getLabel() << '\t' << count << endl; + processedLabels.insert(order->getLabel()); + userLabels.erase(order->getLabel()); + } + + if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastOrder->getLabel()) != 1)) { + rCurve = new Rarefact(lastOrder, rDisplays); + rCurve->getCurve(freq, nIters); + delete rCurve; + + cout << lastOrder->getLabel() << '\t' << count << endl; + processedLabels.insert(lastOrder->getLabel()); + userLabels.erase(lastOrder->getLabel()); } - delete order; + if (count != 1) { delete lastOrder; } + lastOrder = order; + order = (input->getOrderVector()); count++; + } + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastOrder->getLabel()) != 1) { + cout << ". I will use " << lastOrder->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastOrder->getLabel() << "." << endl; + } } - + + //run last line if you need to + if (needToRun == true) { + rCurve = new Rarefact(lastOrder, rDisplays); + rCurve->getCurve(freq, nIters); + delete rCurve; + + cout << lastOrder->getLabel() << '\t' << count << endl; + } + + delete lastOrder; + for(int i=0;iinputFileName); format = globaldata->getFormat(); + convert(globaldata->getFreq(), freq); + convert(globaldata->getIters(), nIters); validCalculator = new ValidCalculators(); int i; @@ -70,36 +72,74 @@ int RareFactSharedCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); + vector lastLookup = lookup; if (lookup.size() < 2) { cout << "I cannot run the command without at least 2 valid groups."; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } - - - while(lookup[0] != NULL){ + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ - //create collectors curve + rCurve = new Rarefact(lookup, rDisplays); - convert(globaldata->getFreq(), freq); - convert(globaldata->getIters(), nIters); rCurve->getSharedCurve(freq, nIters); - delete rCurve; cout << lookup[0]->getLabel() << '\t' << count << endl; + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); } - //prevent memory leak - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + rCurve = new Rarefact(lastLookup, rDisplays); + rCurve->getSharedCurve(freq, nIters); + delete rCurve; + + processedLabels.insert(lastLookup[0]->getLabel()); + userLabels.erase(lastLookup[0]->getLabel()); + } + //prevent memory leak + if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } } + lastLookup = lookup; + //get next line to process lookup = input->getSharedRAbundVectors(); count++; } - + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastLookup[0]->getLabel()) != 1) { + cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + rCurve = new Rarefact(lastLookup, rDisplays); + rCurve->getSharedCurve(freq, nIters); + delete rCurve; + } + + for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } + for(int i=0;igetLabel(); diff --git a/shared.h b/shared.h index 7445b5a..a3f55d5 100644 --- a/shared.h +++ b/shared.h @@ -20,8 +20,8 @@ using namespace std; class Shared { public: Shared(); - ~Shared(); - void getSharedVectors(int, SharedListVector*); + ~Shared() {}; + void getSharedVectors(SharedListVector*); map sharedGroups; //string is groupname, SharedVector* is out info for that group private: diff --git a/sharedcommand.cpp b/sharedcommand.cpp index c29d549..b4de07b 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -36,26 +36,70 @@ SharedCommand::SharedCommand(){ int SharedCommand::execute(){ try { globaldata = GlobalData::getInstance(); + int count = 1; + string errorOff = "no error"; //read in listfile read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); input = globaldata->ginput; SharedList = globaldata->gSharedList; + SharedListVector* lastList = SharedList; + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; shared = new Shared(); - int i = 0; - while(SharedList != NULL){ - if(globaldata->allLines == 1 || globaldata->lines.count(i+1) == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){ + while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { + + + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){ - shared->getSharedVectors(i, SharedList); //fills sharedGroups with new info and updates sharedVector - printSharedData(); //prints info to the .shared file + shared->getSharedVectors(SharedList); //fills sharedGroups with new info and updates sharedVector + printSharedData(); //prints info to the .shared file + + processedLabels.insert(SharedList->getLabel()); + userLabels.erase(SharedList->getLabel()); } + if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastList->getLabel()) != 1)) { + shared->getSharedVectors(lastList); //fills sharedGroups with new info and updates sharedVector + printSharedData(); //prints info to the .shared file + + processedLabels.insert(lastList->getLabel()); + userLabels.erase(lastList->getLabel()); + } + + if (count != 1) { delete lastList; } + lastList = SharedList; + SharedList = input->getSharedListVector(); //get new list vector to process - i++; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + //cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastList->getLabel()) != 1) { + //cout << ". I will use " << lastList->getLabel() << "." << endl; + needToRun = true; + }else { + //cout << ". Please refer to " << lastList->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + shared->getSharedVectors(lastList); //fills sharedGroups with new info and updates sharedVector + printSharedData(); //prints info to the .shared file + } + + delete lastList; + delete shared; + return 0; } catch(exception& e) { @@ -93,6 +137,7 @@ void SharedCommand::printSharedData() { SharedCommand::~SharedCommand(){ //delete list; delete read; + } //********************************************************************************************************************** diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 386017d..ca24b91 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -153,12 +153,6 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), /***********************************************************************/ -SharedRAbundVector::~SharedRAbundVector() { - -} - -/***********************************************************************/ - void SharedRAbundVector::set(int binNumber, int newBinSize, string groupname){ try { int oldBinSize = data[binNumber].abundance; diff --git a/sharedrabundvector.h b/sharedrabundvector.h index 0c71394..8944a26 100644 --- a/sharedrabundvector.h +++ b/sharedrabundvector.h @@ -32,7 +32,7 @@ public: //SharedRAbundVector(string, vector); SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs){}; SharedRAbundVector(ifstream&); - ~SharedRAbundVector(); + ~SharedRAbundVector(){}; int getNumBins(); int getNumSeqs(); diff --git a/summarycommand.cpp b/summarycommand.cpp index b649582..e7fda2e 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -115,7 +115,8 @@ SummaryCommand::~SummaryCommand(){ int SummaryCommand::execute(){ try { - + int count = 1; + //if the users entered no valid calculators don't execute command if (sumCalculators.size() == 0) { return 0; } @@ -125,7 +126,11 @@ int SummaryCommand::execute(){ read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); - + + sabund = globaldata->sabund; + SAbundVector* lastSAbund = sabund; + input = globaldata->ginput; + for(int i=0;igetCols() == 1){ outputFileHandle << '\t' << sumCalculators[i]->getName(); @@ -136,14 +141,18 @@ int SummaryCommand::execute(){ } outputFileHandle << endl; - sabund = globaldata->sabund; - input = globaldata->ginput; - int count = 1; - while(sabund != NULL){ + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; + while((sabund != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(sabund->getLabel()) == 1){ cout << sabund->getLabel() << '\t' << count << endl; + processedLabels.insert(sabund->getLabel()); + userLabels.erase(sabund->getLabel()); + outputFileHandle << sabund->getLabel(); for(int i=0;iprint(outputFileHandle); } - outputFileHandle << endl; } + + if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastSAbund->getLabel()) != 1)) { + + cout << lastSAbund->getLabel() << '\t' << count << endl; + processedLabels.insert(lastSAbund->getLabel()); + userLabels.erase(lastSAbund->getLabel()); + + outputFileHandle << lastSAbund->getLabel(); + for(int i=0;i data = sumCalculators[i]->getValues(lastSAbund); + outputFileHandle << '\t'; + sumCalculators[i]->print(outputFileHandle); + } + outputFileHandle << endl; + } + + if (count != 1) { delete lastSAbund; } + lastSAbund = sabund; + sabund = input->getSAbundVector(); count++; } - + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastSAbund->getLabel()) != 1) { + cout << ". I will use " << lastSAbund->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastSAbund->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastSAbund->getLabel() << '\t' << count << endl; + outputFileHandle << lastSAbund->getLabel(); + for(int i=0;i data = sumCalculators[i]->getValues(lastSAbund); + outputFileHandle << '\t'; + sumCalculators[i]->print(outputFileHandle); + } + outputFileHandle << endl; + } + + delete lastSAbund; return 0; } catch(exception& e) { diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index d1d1be0..fafc290 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -133,6 +133,7 @@ int SummarySharedCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); + vector lastLookup = lookup; //output estimator names as column headers outputFileHandle << "label" <<'\t' << "comparison" << '\t'; @@ -165,29 +166,98 @@ int SummarySharedCommand::execute(){ return 0; } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; - while(lookup[0] != NULL){ + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ - cout << lookup[0]->getLabel() << '\t' << count << endl; + process(lookup); + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + process(lastLookup); + + processedLabels.insert(lastLookup[0]->getLabel()); + userLabels.erase(lastLookup[0]->getLabel()); + } + + + //prevent memory leak + if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } } + lastLookup = lookup; + + //get next line to process + lookup = input->getSharedRAbundVectors(); + count++; + } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastLookup[0]->getLabel()) != 1) { + cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + process(lastLookup); + } + + for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } + + //reset groups parameter + globaldata->Groups.clear(); globaldata->setGroups(""); + + //close files + outputFileHandle.close(); + if (mult == true) { outAll.close(); } + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SummarySharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************/ +void SummarySharedCommand::process(vector thisLookup) { + try { //loop through calculators and add to file all for all calcs that can do mutiple groups if (mult == true) { //output label - outAll << lookup[0]->getLabel() << '\t'; + outAll << thisLookup[0]->getLabel() << '\t'; //output groups names string outNames = ""; - for (int j = 0; j < lookup.size(); j++) { - outNames += lookup[j]->getGroup() + "-"; + for (int j = 0; j < thisLookup.size(); j++) { + outNames += thisLookup[j]->getGroup() + "-"; } outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-'; outAll << outNames << '\t'; for(int i=0;igetMultiple() == true) { - sumCalculators[i]->getValues(lookup); + sumCalculators[i]->getValues(thisLookup); outAll << '\t'; sumCalculators[i]->print(outAll); } @@ -197,20 +267,20 @@ int SummarySharedCommand::execute(){ int n = 1; vector subset; - for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare - for (int l = n; l < lookup.size(); l++) { + for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare + for (int l = n; l < thisLookup.size(); l++) { - outputFileHandle << lookup[0]->getLabel() << '\t'; + outputFileHandle << thisLookup[0]->getLabel() << '\t'; subset.clear(); //clear out old pair of sharedrabunds //add new pair of sharedrabunds - subset.push_back(lookup[k]); subset.push_back(lookup[l]); + subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); //sort groups to be alphanumeric - if (lookup[k]->getGroup() > lookup[l]->getGroup()) { - outputFileHandle << (lookup[l]->getGroup() +'\t' + lookup[k]->getGroup()) << '\t'; //print out groups + if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) { + outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups }else{ - outputFileHandle << (lookup[k]->getGroup() +'\t' + lookup[l]->getGroup()) << '\t'; //print out groups + outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups } for(int i=0;igetSharedRAbundVectors(); - count++; - } - - //reset groups parameter - globaldata->Groups.clear(); globaldata->setGroups(""); - - //close files - outputFileHandle.close(); - if (mult == true) { outAll.close(); } - return 0; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the SummarySharedCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the SummarySharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the SummarySharedCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } -/***********************************************************/ +/***********************************************************/ \ No newline at end of file diff --git a/summarysharedcommand.h b/summarysharedcommand.h index e73714a..0f2f753 100644 --- a/summarysharedcommand.h +++ b/summarysharedcommand.h @@ -53,6 +53,7 @@ private: string outputFileName, format, outAllFileName; ofstream outputFileHandle, outAll; bool mult; + void process(vector); }; diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index a21c47a..7f97ef3 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -94,6 +94,7 @@ int TreeGroupCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); + lastLookup = lookup; if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0; } @@ -314,11 +315,7 @@ void TreeGroupCommand::makeSimsDist() { void TreeGroupCommand::makeSimsShared() { try { int count = 1; - EstOutput data; - vector subset; - numGroups = globaldata->Groups.size(); - //clear globaldatas old tree names if any globaldata->Treenames.clear(); @@ -330,11 +327,75 @@ void TreeGroupCommand::makeSimsShared() { tmap->makeSim(globaldata->gGroupmap); globaldata->gTreemap = tmap; - while(lookup[0] != NULL){ + set processedLabels; + set userLabels = globaldata->labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ - cout << lookup[0]->getLabel() << '\t' << count << endl; + process(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + process(lastLookup); + + processedLabels.insert(lastLookup[0]->getLabel()); + userLabels.erase(lastLookup[0]->getLabel()); + } + + //prevent memory leak + if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } } + lastLookup = lookup; + + //get next line to process + lookup = input->getSharedRAbundVectors(); + count++; + } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastLookup[0]->getLabel()) != 1) { + cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + process(lastLookup); + } + + for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } + for(int i = 0 ; i < treeCalculators.size(); i++) { delete treeCalculators[i]; } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************/ +void TreeGroupCommand::process(vector thisLookup) { + try{ + EstOutput data; + vector subset; + numGroups = globaldata->Groups.size(); //for each calculator for(int i = 0 ; i < treeCalculators.size(); i++) { @@ -352,16 +413,16 @@ void TreeGroupCommand::makeSimsShared() { for (int g = 0; g < numGroups; g++) { index[g] = g; } //create a new filename - outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre"; + outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre"; - for (int k = 0; k < lookup.size(); k++) { - for (int l = k; l < lookup.size(); l++) { + for (int k = 0; k < thisLookup.size(); k++) { + for (int l = k; l < thisLookup.size(); l++) { if (k != l) { //we dont need to similiarity of a groups to itself //get estimated similarity between 2 groups subset.clear(); //clear out old pair of sharedrabunds //add new pair of sharedrabunds - subset.push_back(lookup[k]); subset.push_back(lookup[l]); + subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); data = treeCalculators[i]->getValues(subset); //saves the calculator outputs //save values in similarity matrix @@ -374,28 +435,18 @@ void TreeGroupCommand::makeSimsShared() { //creates tree from similarity matrix and write out file createTree(); } - } - - //prevent memory leak - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - - //get next line to process - lookup = input->getSharedRAbundVectors(); - count++; - } - - for(int i = 0 ; i < treeCalculators.size(); i++) { delete treeCalculators[i]; } + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the TreeGroupCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } - /***********************************************************/ + diff --git a/treegroupscommand.h b/treegroupscommand.h index 3f145e9..989c699 100644 --- a/treegroupscommand.h +++ b/treegroupscommand.h @@ -58,10 +58,13 @@ private: InputData* input; ValidCalculators* validCalculator; vector lookup; + vector lastLookup; string format, outputFile, groupNames, filename; int numGroups; ofstream out; float precision, cutoff; + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + void process(vector); }; diff --git a/venncommand.cpp b/venncommand.cpp index ced4c48..519e6cb 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -87,7 +87,9 @@ VennCommand::~VennCommand(){ int VennCommand::execute(){ try { - int count = 1; + int count = 1; + SAbundVector* lastSAbund; + vector lastLookup; //if the users entered no valid calculators don't execute command if (vennCalculators.size() == 0) { return 0; } @@ -99,60 +101,138 @@ int VennCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); + lastLookup = lookup; }else if (format == "list") { //you are using just a list file and have only one group read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); sabund = globaldata->sabund; + lastSAbund = globaldata->sabund; input = globaldata->ginput; } - + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = globaldata->labels; if (format != "list") { - while(lookup[0] != NULL){ + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ - cout << lookup[0]->getLabel() << '\t' << count << endl; + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); if (lookup.size() > 4) { cout << "Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile." << endl; - for (int i = lookup.size(); i > 3; i--) { delete lookup[i]; lookup.pop_back(); } + for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor } - - //util->getSharedVectors(globaldata->Groups, lookup, order); //fills group vectors from order vector. venn->getPic(lookup, vennCalculators); } - //prevent memory leak - for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + processedLabels.insert(lastLookup[0]->getLabel()); + userLabels.erase(lastLookup[0]->getLabel()); + + if (lastLookup.size() > 4) { + cout << "Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile." << endl; + for (int i = lastLookup.size(); i > 4; i--) { lastLookup.pop_back(); } //no memmory leak because pop_back calls destructor + } + venn->getPic(lastLookup, vennCalculators); + } + //prevent memory leak + if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } } + lastLookup = lookup; + //get next line to process lookup = input->getSharedRAbundVectors(); count++; } + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastLookup[0]->getLabel()) != 1) { + cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastLookup[0]->getLabel() << '\t' << count << endl; + if (lastLookup.size() > 4) { + cout << "Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile." << endl; + for (int i = lastLookup.size(); i > 3; i--) { delete lastLookup[i]; lastLookup.pop_back(); } + } + venn->getPic(lastLookup, vennCalculators); + } + + for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } + //reset groups parameter globaldata->Groups.clear(); }else{ - while(sabund != NULL){ + while((sabund != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(sabund->getLabel()) == 1){ cout << sabund->getLabel() << '\t' << count << endl; venn->getPic(sabund, vennCalculators); + + processedLabels.insert(sabund->getLabel()); + userLabels.erase(sabund->getLabel()); } - delete sabund; + if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastSAbund->getLabel()) != 1)) { + + cout << lastSAbund->getLabel() << '\t' << count << endl; + venn->getPic(lastSAbund, vennCalculators); + + processedLabels.insert(lastSAbund->getLabel()); + userLabels.erase(lastSAbund->getLabel()); + } + + if (count != 1) { delete lastSAbund; } + lastSAbund = sabund; + sabund = input->getSAbundVector(); count++; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + cout << "Your file does not include the label "<< *it; + if (processedLabels.count(lastSAbund->getLabel()) != 1) { + cout << ". I will use " << lastSAbund->getLabel() << "." << endl; + needToRun = true; + }else { + cout << ". Please refer to " << lastSAbund->getLabel() << "." << endl; + } + } + + //run last line if you need to + if (needToRun == true) { + cout << lastSAbund->getLabel() << '\t' << count << endl; + venn->getPic(lastSAbund, vennCalculators); + } + delete lastSAbund; } + globaldata->setGroups(""); return 0; }