From: Sarah Westcott Date: Thu, 17 Oct 2013 18:20:30 +0000 (-0400) Subject: adding labels to list file. X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=499f4ac6e321f9f03d4c3aa25c3b6880892c8b83 adding labels to list file. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 4fb82f5..a26db61 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -2410,7 +2410,7 @@ "DSTROOT[sdk=*]" = TARGET_BUILD_DIR; GCC_DYNAMIC_NO_PIC = NO; GCC_MODEL_TUNING = G5; - GCC_OPTIMIZATION_LEVEL = 3; + GCC_OPTIMIZATION_LEVEL = 0; "INSTALL_PATH[sdk=*]" = TARGET_BUILD_DIR; PRODUCT_NAME = mothur; SDKROOT = macosx; @@ -2426,7 +2426,7 @@ DEPLOYMENT_LOCATION = YES; DSTROOT = TARGET_BUILD_DIR; GCC_MODEL_TUNING = G5; - GCC_OPTIMIZATION_LEVEL = 3; + GCC_OPTIMIZATION_LEVEL = 0; GCC_WARN_UNUSED_VALUE = YES; PRODUCT_NAME = mothur; SDKROOT = macosx; diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp index f9784b3..738c324 100644 --- a/binsequencecommand.cpp +++ b/binsequencecommand.cpp @@ -398,6 +398,7 @@ int BinSeqCommand::process(ListVector* list) { m->mothurOut(list->getLabel()); m->mothurOutEndLine(); //for each bin in the list vector + vector binLabels = list->getLabels(); for (int i = 0; i < list->size(); i++) { if (m->control_pressed) { return 1; } @@ -421,11 +422,11 @@ int BinSeqCommand::process(ListVector* list) { } if (groups.size() != 0) { groupInfo += groups[groups.size()-1]; } else { groupInfo = "not found"; } - name = name + "\t" + groupInfo + "\t" + toString(i+1)+ "\tNumRep=" + toString(ct.getNumSeqs(name)); + name = name + "\t" + groupInfo + "\t" + binLabels[i] + "\tNumRep=" + toString(ct.getNumSeqs(name)); out << ">" << name << endl; out << sequence << endl; }else { - name = name + "\t" + toString(i+1) + "\tNumRep=" + toString(ct.getNumSeqs(name)); + name = name + "\t" + binLabels[i] + "\tNumRep=" + toString(ct.getNumSeqs(name)); out << ">" << name << endl; out << sequence << endl; } @@ -435,7 +436,7 @@ int BinSeqCommand::process(ListVector* list) { if (sequence != "not found") { //if you don't have groups if (groupfile == "") { - name = name + "\t" + toString(i+1); + name = name + "\t" + binLabels[i]; out << ">" << name << endl; out << sequence << endl; }else {//if you do have groups @@ -444,7 +445,7 @@ int BinSeqCommand::process(ListVector* list) { m->mothurOut(name + " is missing from your group file. Please correct. "); m->mothurOutEndLine(); return 1; }else{ - name = name + "\t" + group + "\t" + toString(i+1); + name = name + "\t" + group + "\t" + binLabels[i]; out << ">" << name << endl; out << sequence << endl; } diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp index 217bc86..160928f 100644 --- a/classifyotucommand.cpp +++ b/classifyotucommand.cpp @@ -586,6 +586,7 @@ int ClassifyOtuCommand::process(ListVector* processList) { //for each bin in the list vector string snumBins = toString(processList->getNumBins()); + vector binLabels = processList->getLabels(); for (int i = 0; i < processList->getNumBins(); i++) { if (m->control_pressed) { break; } @@ -598,17 +599,8 @@ int ClassifyOtuCommand::process(ListVector* processList) { names = findConsensusTaxonomy(thisNames, size, conTax); if (m->control_pressed) { break; } - - //output to new names file - string binLabel = "Otu"; - string sbinNumber = toString(i+1); - if (sbinNumber.length() < snumBins.length()) { - int diff = snumBins.length() - sbinNumber.length(); - for (int h = 0; h < diff; h++) { binLabel += "0"; } - } - binLabel += sbinNumber; - out << binLabel << '\t' << size << '\t' << conTax << endl; + out << binLabels[i] << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; m->removeConfidences(noConfidenceConTax); @@ -683,16 +675,8 @@ int ClassifyOtuCommand::process(ListVector* processList) { if (m->control_pressed) { break; } - //output to new names file - string binLabel = "Otu"; - string sbinNumber = toString(i+1); - if (sbinNumber.length() < snumBins.length()) { - int diff = snumBins.length() - sbinNumber.length(); - for (int h = 0; h < diff; h++) { binLabel += "0"; } - } - binLabel += sbinNumber; - (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl; + (*outs[groupIndex[itParsed->first]]) << binLabels[i] << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; m->removeConfidences(noConfidenceConTax); diff --git a/clustercommand.cpp b/clustercommand.cpp index c1ac5bb..5ac4166 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -361,7 +361,7 @@ int ClusterCommand::execute(){ } m->openOutputFile(listFileName, listFile); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); - + list->printHeaders(listFile); time_t estart = time(NULL); float previousDist = 0.00000; diff --git a/clusterdoturcommand.cpp b/clusterdoturcommand.cpp index 96bfc5c..dadc9cc 100644 --- a/clusterdoturcommand.cpp +++ b/clusterdoturcommand.cpp @@ -259,6 +259,7 @@ int ClusterDoturCommand::execute(){ } m->openOutputFile(listFileName, listFile); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); + list->printHeaders(listFile); float previousDist = 0.00000; float rndPreviousDist = 0.00000; diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index b5dc969..874eb6d 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -833,8 +833,10 @@ int ClusterSplitCommand::mergeLists(vector listNames, map us m->openOutputFile(listFileName, outList); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); - map::iterator itLabel; + + //clears out junk for autocompleting of list files above. Perhaps there is a beter way to handle this from within the data structure? + m->printedListHeaders = false; //for each label needed for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) { @@ -890,6 +892,8 @@ int ClusterSplitCommand::mergeLists(vector listNames, map us rabund->print(outRabund); } //outList << endl; + if (!m->printedListHeaders) { + m->listBinLabelsInFile.clear(); completeList.printHeaders(outList); } completeList.print(outList); if (rabund != NULL) { delete rabund; } diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index dd1ec27..40ee3d2 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -89,7 +89,7 @@ string CollectSharedCommand::getHelpString(){ helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n"; helpString += "If you use sharedchao and run into memory issues, set all to false. \n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"; - helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n"; + helpString += "Note: No spaces between parameter labels (i.e. shared), '=' and parameters (i.e.yourSharedfile).\n"; return helpString; } catch(exception& e) { diff --git a/consensusseqscommand.cpp b/consensusseqscommand.cpp index fbec746..d9a5e29 100644 --- a/consensusseqscommand.cpp +++ b/consensusseqscommand.cpp @@ -436,23 +436,16 @@ int ConsensusSeqsCommand::processList(ListVector*& list){ outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl; string snumBins = toString(list->getNumBins()); + vector binLabels = list->getLabels(); for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; } string bin = list->get(i); string consSeq = getConsSeq(bin, outSummary, i); - - string seqName = "Otu"; - string sbinNumber = toString(i+1); - if (sbinNumber.length() < snumBins.length()) { - int diff = snumBins.length() - sbinNumber.length(); - for (int h = 0; h < diff; h++) { seqName += "0"; } - } - seqName += sbinNumber; - outFasta << ">" << seqName << endl << consSeq << endl; - outName << seqName << '\t' << seqName << "," << bin << endl; + outFasta << ">" << binLabels[i] << endl << consSeq << endl; + outName << binLabels[i] << '\t' << binLabels[i] << "," << bin << endl; } outSummary.close(); outName.close(); outFasta.close(); diff --git a/corraxescommand.cpp b/corraxescommand.cpp index 853c174..72fa03b 100644 --- a/corraxescommand.cpp +++ b/corraxescommand.cpp @@ -339,7 +339,7 @@ int CorrAxesCommand::calcPearson(map >& axes, ofstream& ou //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << m->currentBinLabels[i]; } + if (metadatafile == "") { out << m->currentSharedBinLabels[i]; } else { out << metadataLabels[i]; } //find the averages this otu - Y @@ -474,7 +474,7 @@ int CorrAxesCommand::calcSpearman(map >& axes, ofstream& o //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << m->currentBinLabels[i]; } + if (metadatafile == "") { out << m->currentSharedBinLabels[i]; } else { out << metadataLabels[i]; } //find the ranks of this otu - Y @@ -627,7 +627,7 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << m->currentBinLabels[i]; } + if (metadatafile == "") { out << m->currentSharedBinLabels[i]; } else { out << metadataLabels[i]; } //find the ranks of this otu - Y @@ -834,7 +834,7 @@ int CorrAxesCommand::eliminateZeroOTUS(vector& thisloo for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } newBinLabels.push_back(binLabel); } @@ -843,7 +843,7 @@ int CorrAxesCommand::eliminateZeroOTUS(vector& thisloo for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; - m->currentBinLabels = newBinLabels; + m->currentSharedBinLabels = newBinLabels; return 0; diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp index 235682b..a353166 100644 --- a/createdatabasecommand.cpp +++ b/createdatabasecommand.cpp @@ -364,11 +364,15 @@ int CreateDatabaseCommand::execute(){ header += "repSeqName\trepSeq\tOTUConTaxonomy"; out << header << endl; + vector binLabels = list->getLabels(); for (int i = 0; i < list->getNumBins(); i++) { + int index = findIndex(otuLabels, binLabels[i]); + if (index == -1) { m->mothurOut("[ERROR]: " + binLabels[i] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; } + if (m->control_pressed) { break; } - out << otuLabels[i] << '\t'; + out << otuLabels[index] << '\t'; vector binNames; string bin = list->get(i); @@ -387,12 +391,12 @@ int CreateDatabaseCommand::execute(){ map::iterator it = repNames.find(bin); if (it == repNames.end()) { - m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; }else { seqRepName = it->second; numSeqsRep = binNames.size(); } //sanity check - if (binNames.size() != classifyOtuSizes[i]) { - m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + if (binNames.size() != classifyOtuSizes[index]) { + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; } }else { //find rep sequence in bin @@ -406,11 +410,11 @@ int CreateDatabaseCommand::execute(){ } if (seqRepName == "") { - m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; } if (numSeqsRep != classifyOtuSizes[i]) { - m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; } } @@ -443,7 +447,7 @@ int CreateDatabaseCommand::execute(){ }else { out << numSeqsRep << '\t'; } //output repSeq - out << seqRepName << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl; + out << seqRepName << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; } @@ -462,8 +466,8 @@ int CreateDatabaseCommand::execute(){ if (m->control_pressed) { break; } - int index = findIndex(otuLabels, m->currentBinLabels[h]); - if (index == -1) { m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; } + int index = findIndex(otuLabels, m->currentSharedBinLabels[h]); + if (index == -1) { m->mothurOut("[ERROR]: " + m->currentSharedBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; } if (m->control_pressed) { break; } @@ -478,7 +482,7 @@ int CreateDatabaseCommand::execute(){ //sanity check if (totalAbund != classifyOtuSizes[index]) { - m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break; + m->mothurOut("[WARNING]: OTU " + m->currentSharedBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break; } //output repSeq diff --git a/datavector.hpp b/datavector.hpp index e6c839d..dfcd684 100644 --- a/datavector.hpp +++ b/datavector.hpp @@ -23,10 +23,10 @@ class DataVector { public: DataVector(){ m = MothurOut::getInstance(); }// : maxRank(0), numBins(0), numSeqs(0){}; - DataVector(string l) : label(l) {}; - DataVector(const DataVector& dv) : label(dv.label){};//, maxRank(dv.maxRank), numBins(dv.numBins), numSeqs(dv.numSeqs) {}; - DataVector(ifstream&); - DataVector(ifstream&, GroupMap*); + DataVector(string l) : label(l) { m = MothurOut::getInstance();}; + DataVector(const DataVector& dv) : label(dv.label){ m = MothurOut::getInstance();};//, maxRank(dv.maxRank), numBins(dv.numBins), numSeqs(dv.numSeqs) {}; + DataVector(ifstream&) {m = MothurOut::getInstance();} + DataVector(ifstream&, GroupMap*){m = MothurOut::getInstance();} virtual ~DataVector(){}; // virtual int getNumBins() { return numBins; } diff --git a/engine.cpp b/engine.cpp index e4e1071..d6be4e4 100644 --- a/engine.cpp +++ b/engine.cpp @@ -123,10 +123,12 @@ bool InteractEngine::getInput(){ mout->clearAllGroups(); mout->Treenames.clear(); mout->saveNextLabel = ""; - mout->printedHeaders = false; - mout->commandInputsConvertError = false; - mout->currentBinLabels.clear(); - mout->binLabelsInFile.clear(); + mout->commandInputsConvertError = false; + mout->printedSharedHeaders = false; + mout->currentSharedBinLabels.clear(); + mout->sharedBinLabelsInFile.clear(); + mout->printedListHeaders = false; + mout->listBinLabelsInFile.clear(); Command* command = cFactory->getCommand(commandName, options); if (mout->commandInputsConvertError) { quitCommandCalled = 2; } @@ -309,10 +311,12 @@ bool BatchEngine::getInput(){ mout->clearAllGroups(); mout->Treenames.clear(); mout->saveNextLabel = ""; - mout->printedHeaders = false; mout->commandInputsConvertError = false; - mout->currentBinLabels.clear(); - mout->binLabelsInFile.clear(); + mout->printedSharedHeaders = false; + mout->currentSharedBinLabels.clear(); + mout->sharedBinLabelsInFile.clear(); + mout->printedListHeaders = false; + mout->listBinLabelsInFile.clear(); Command* command = cFactory->getCommand(commandName, options); @@ -480,10 +484,12 @@ bool ScriptEngine::getInput(){ mout->clearAllGroups(); mout->Treenames.clear(); mout->saveNextLabel = ""; - mout->printedHeaders = false; - mout->commandInputsConvertError = false; - mout->currentBinLabels.clear(); - mout->binLabelsInFile.clear(); + mout->commandInputsConvertError = false; + mout->printedSharedHeaders = false; + mout->currentSharedBinLabels.clear(); + mout->sharedBinLabelsInFile.clear(); + mout->printedListHeaders = false; + mout->listBinLabelsInFile.clear(); Command* command = cFactory->getCommand(commandName, options); if (mout->commandInputsConvertError) { quitCommandCalled = 2; } diff --git a/filtersharedcommand.cpp b/filtersharedcommand.cpp index 5b5fcb1..a2510a6 100644 --- a/filtersharedcommand.cpp +++ b/filtersharedcommand.cpp @@ -325,7 +325,7 @@ int FilterSharedCommand::processShared(vector& thislookup) try { //save mothurOut's binLabels to restore for next label - vector saveBinLabels = m->currentBinLabels; + vector saveBinLabels = m->currentSharedBinLabels; map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); @@ -474,7 +474,7 @@ int FilterSharedCommand::processShared(vector& thislookup) m->openOutputFile(outputFileName, out); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); - m->currentBinLabels = filteredLabels; + m->currentSharedBinLabels = filteredLabels; filteredLookup[0]->printHeaders(out); @@ -486,7 +486,7 @@ int FilterSharedCommand::processShared(vector& thislookup) //save mothurOut's binLabels to restore for next label - m->currentBinLabels = saveBinLabels; + m->currentSharedBinLabels = saveBinLabels; for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; } diff --git a/getcoremicrobiomecommand.cpp b/getcoremicrobiomecommand.cpp index 5fbb781..2a81ace 100644 --- a/getcoremicrobiomecommand.cpp +++ b/getcoremicrobiomecommand.cpp @@ -352,14 +352,14 @@ int GetCoreMicroBiomeCommand::createTable(vector& look for (int k = 0; k < counts[j]; k++) { table[j][k]++; } if ((abund == -1) && (samples != -1)) { //we want all OTUs with this number of samples - if (counts[j] >= samples) { otuNames[j].push_back(m->currentBinLabels[i]); } + if (counts[j] >= samples) { otuNames[j].push_back(m->currentSharedBinLabels[i]); } }else if ((abund != -1) && (samples == -1)) { //we want all OTUs with this relabund if (j == abund) { - for (int k = 0; k < counts[j]; k++) { otuNames[k+1].push_back(m->currentBinLabels[i]); } + for (int k = 0; k < counts[j]; k++) { otuNames[k+1].push_back(m->currentSharedBinLabels[i]); } } }else if ((abund != -1) && (samples != -1)) { //we want only OTUs with this relabund for this number of samples if ((j == abund) && (counts[j] >= samples)) { - otuNames[j].push_back(m->currentBinLabels[i]); + otuNames[j].push_back(m->currentSharedBinLabels[i]); } } } diff --git a/getgroupscommand.cpp b/getgroupscommand.cpp index 15dcbba..bb15a3f 100644 --- a/getgroupscommand.cpp +++ b/getgroupscommand.cpp @@ -68,7 +68,7 @@ string GetGroupsCommand::getOutputPattern(string type) { else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } - else if (type == "list") { pattern = "[filename],pick,[extension]"; } + else if (type == "list") { pattern = "[filename],[tag],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; } else if (type == "design") { pattern = "[filename],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } @@ -585,10 +585,6 @@ int GetGroupsCommand::readList(){ map variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); - string outputFileName = getOutputFileName("list", variables); - - ofstream out; - m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(listfile, in); @@ -599,9 +595,19 @@ int GetGroupsCommand::readList(){ while(!in.eof()){ selectedCount = 0; - + //read in list vector ListVector list(in); + + variables["[tag]"] = list.getLabel(); + string outputFileName = getOutputFileName("list", variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); + + vector binLabels = list.getLabels(); + vector newBinLabels; //make a new list vector ListVector newList; @@ -613,13 +619,14 @@ int GetGroupsCommand::readList(){ //parse out names that are in accnos file string binnames = list.get(i); + vector thisBinNames; + m->splitAtComma(binnames, thisBinNames); string newNames = ""; - while (binnames.find_first_of(',') != -1) { - string name = binnames.substr(0,binnames.find_first_of(',')); - binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); - - //if that name is in the .accnos file, add it + for (int j = 0; j < thisBinNames.size(); j++) { + string name = thisBinNames[j]; + + //if that name is in the .accnos file, add it if (names.count(name) != 0) { newNames += name + ","; selectedCount++; } else{ //if you are not in the accnos file check if you are a name that needs to be changed @@ -629,39 +636,30 @@ int GetGroupsCommand::readList(){ selectedCount++; } } - } - - //get last name - if (names.count(binnames) != 0) { newNames += binnames + ","; selectedCount++; } - else{ - //if you are not in the accnos file check if you are a name that needs to be changed - map::iterator it = uniqueToRedundant.find(binnames); - if (it != uniqueToRedundant.end()) { - newNames += it->second + ","; - selectedCount++; - } - } - + } + //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma - newList.push_back(newNames); + newList.push_back(newNames); + newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; + newList.setLabels(newBinLabels); + newList.printHeaders(out); newList.print(out); } m->gobble(in); + out.close(); } - in.close(); - out.close(); + in.close(); if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); } - outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine(); diff --git a/getlineagecommand.cpp b/getlineagecommand.cpp index 862aef7..ec5ef61 100644 --- a/getlineagecommand.cpp +++ b/getlineagecommand.cpp @@ -75,7 +75,7 @@ string GetLineageCommand::getOutputPattern(string type) { else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } - else if (type == "list") { pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]"; } + else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "alignreport") { pattern = "[filename],pick.align.report"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } @@ -537,18 +537,13 @@ int GetLineageCommand::readList(){ map variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); - string outputFileName = getOutputFileName("list", variables); - ofstream out; - m->openOutputFile(outputFileName, out); - + ifstream in; m->openInputFile(listfile, in); bool wroteSomething = false; while(!in.eof()){ - - if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } //read in list vector ListVector list(in); @@ -556,6 +551,18 @@ int GetLineageCommand::readList(){ //make a new list vector ListVector newList; newList.setLabel(list.getLabel()); + + variables["[distance]"] = list.getLabel(); + string outputFileName = getOutputFileName("list", variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); + + if (m->control_pressed) { in.close(); out.close(); return 0; } + + vector binLabels = list.getLabels(); + vector newBinLabels; //for each bin for (int i = 0; i < list.getNumBins(); i++) { @@ -576,23 +583,26 @@ int GetLineageCommand::readList(){ //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma - newList.push_back(newNames); + newList.push_back(newNames); + newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; + newList.setLabels(newBinLabels); + newList.printHeaders(out); newList.print(out); } m->gobble(in); + out.close(); } in.close(); - out.close(); + if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); } - outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); return 0; @@ -615,6 +625,8 @@ int GetLineageCommand::readConsList(){ bool wroteSomething = false; string snumBins = toString(list->getNumBins()); + vector binLabels = list->getLabels(); + vector newBinLabels; for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { delete list; return 0;} @@ -631,6 +643,7 @@ int GetLineageCommand::readConsList(){ if (names.count(m->getSimpleLabel(otuLabel)) != 0) { selectedCount++; newList.push_back(list->get(i)); + newBinLabels.push_back(binLabels[i]); } } @@ -648,6 +661,8 @@ int GetLineageCommand::readConsList(){ //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; + newList.setLabels(newBinLabels); + newList.printHeaders(out); newList.print(out); } out.close(); @@ -767,9 +782,9 @@ int GetLineageCommand::readShared(){ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; } //is this otu on the list - if (names.count(m->getSimpleLabel(m->currentBinLabels[i])) != 0) { + if (names.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) != 0) { numSelected++; wroteSomething = true; - newLabels.push_back(m->currentBinLabels[i]); + newLabels.push_back(m->currentSharedBinLabels[i]); for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup()); } @@ -789,7 +804,7 @@ int GetLineageCommand::readShared(){ for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } - m->currentBinLabels = newLabels; + m->currentSharedBinLabels = newLabels; newLookup[0]->printHeaders(out); diff --git a/getlistcountcommand.cpp b/getlistcountcommand.cpp index a4d7b83..2f194a8 100644 --- a/getlistcountcommand.cpp +++ b/getlistcountcommand.cpp @@ -260,19 +260,20 @@ void GetListCountCommand::process(ListVector* list) { m->mothurOut(list->getLabel()); m->mothurOutEndLine(); //for each bin in the list vector + vector binLabels = list->getLabels(); for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { break; } binnames = list->get(i); if (sort == "otu") { - out << i+1 << '\t' << binnames << endl; + out << binLabels[i] << '\t' << binnames << endl; }else{ //sort = name vector names; m->splitAtComma(binnames, names); for (int j = 0; j < names.size(); j++) { - out << names[j] << '\t' << i+1 << endl; + out << names[j] << '\t' << binLabels[i] << endl; } } } diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp index 8f78ca2..08cd35a 100644 --- a/getmetacommunitycommand.cpp +++ b/getmetacommunitycommand.cpp @@ -577,7 +577,7 @@ int GetMetaCommunityCommand::processDriver(vector& thislook outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName); findQ.printZMatrix(matrixName, thisGroups); - findQ.printRelAbund(relabund, m->currentBinLabels); + findQ.printRelAbund(relabund, m->currentSharedBinLabels); if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp"; diff --git a/getotulabelscommand.cpp b/getotulabelscommand.cpp index 00297e2..452d7f6 100644 --- a/getotulabelscommand.cpp +++ b/getotulabelscommand.cpp @@ -465,9 +465,9 @@ int GetOtuLabelsCommand::readShared(){ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; } //is this otu on the list - if (labels.count(m->getSimpleLabel(m->currentBinLabels[i])) != 0) { + if (labels.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) != 0) { numSelected++; wroteSomething = true; - newLabels.push_back(m->currentBinLabels[i]); + newLabels.push_back(m->currentSharedBinLabels[i]); for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup()); } @@ -487,7 +487,7 @@ int GetOtuLabelsCommand::readShared(){ for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } - m->currentBinLabels = newLabels; + m->currentSharedBinLabels = newLabels; newLookup[0]->printHeaders(out); @@ -523,22 +523,16 @@ int GetOtuLabelsCommand::readList(){ bool wroteSomething = false; string snumBins = toString(list->getNumBins()); + vector binLabels = list->getLabels(); + vector newLabels; for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { delete list; return 0;} - //create a label for this otu - string otuLabel = "Otu"; - string sbinNumber = toString(i+1); - if (sbinNumber.length() < snumBins.length()) { - int diff = snumBins.length() - sbinNumber.length(); - for (int h = 0; h < diff; h++) { otuLabel += "0"; } - } - otuLabel += sbinNumber; - - if (labels.count(m->getSimpleLabel(otuLabel)) != 0) { + if (labels.count(m->getSimpleLabel(binLabels[i])) != 0) { selectedCount++; newList.push_back(list->get(i)); + newLabels.push_back(binLabels[i]); } } @@ -556,6 +550,8 @@ int GetOtuLabelsCommand::readList(){ //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; + newList.setLabels(newLabels); + newList.printHeaders(out); newList.print(out); } out.close(); diff --git a/getotuscommand.cpp b/getotuscommand.cpp index 335ebc4..3ca7f5d 100644 --- a/getotuscommand.cpp +++ b/getotuscommand.cpp @@ -244,9 +244,16 @@ int GetOtusCommand::execute(){ //********************************************************************************************************************** int GetOtusCommand::readListGroup(){ try { - string thisOutputDir = outputDir; + InputData* input = new InputData(listfile, "list"); + ListVector* list = input->getListVector(); + string lastLabel = list->getLabel(); + + //using first label seen if none is provided + if (label == "") { label = lastLabel; } + + string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } - map variables; + map variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[tag]"] = label; variables["[extension]"] = m->getExtension(listfile); @@ -254,8 +261,8 @@ int GetOtusCommand::readListGroup(){ ofstream out; m->openOutputFile(outputFileName, out); - - string GroupOutputDir = outputDir; + + string GroupOutputDir = outputDir; if (outputDir == "") { GroupOutputDir += m->hasPath(groupfile); } variables["[filename]"] = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)); variables["[extension]"] = m->getExtension(groupfile); @@ -263,13 +270,7 @@ int GetOtusCommand::readListGroup(){ ofstream outGroup; m->openOutputFile(outputGroupFileName, outGroup); - - InputData* input = new InputData(listfile, "list"); - ListVector* list = input->getListVector(); - string lastLabel = list->getLabel(); - - //using first label seen if none is provided - if (label == "") { label = lastLabel; } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); @@ -366,6 +367,8 @@ int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream int numOtus = 0; //for each bin + vector binLabels = list->getLabels(); + vector newBinLabels; for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { return 0; } @@ -399,7 +402,8 @@ int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream //if there are sequences from the groups we want in this bin add to new list, output to groupfile if (keepBin) { - newList.push_back(binnames); + newList.push_back(binnames); + newBinLabels.push_back(binLabels[i]); outGroup << groupFileOutput; numOtus++; } @@ -408,7 +412,9 @@ int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; - newList.print(out); + newList.setLabels(newBinLabels); + newList.printHeaders(out); + newList.print(out); } m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine(); diff --git a/getrelabundcommand.cpp b/getrelabundcommand.cpp index f36f1ba..791f563 100644 --- a/getrelabundcommand.cpp +++ b/getrelabundcommand.cpp @@ -192,7 +192,7 @@ int GetRelAbundCommand::execute(){ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } getRelAbundance(lookup, out); processedLabels.insert(lookup[0]->getLabel()); @@ -205,7 +205,7 @@ int GetRelAbundCommand::execute(){ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } getRelAbundance(lookup, out); processedLabels.insert(lookup[0]->getLabel()); @@ -246,7 +246,7 @@ int GetRelAbundCommand::execute(){ lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } getRelAbundance(lookup, out); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } diff --git a/getseqscommand.cpp b/getseqscommand.cpp index 82f9710..7f0f3dd 100644 --- a/getseqscommand.cpp +++ b/getseqscommand.cpp @@ -88,7 +88,7 @@ string GetSeqsCommand::getOutputPattern(string type) { else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } - else if (type == "list") { pattern = "[filename],pick,[extension]"; } + else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "qfile") { pattern = "[filename],pick,[extension]"; } else if (type == "accnosreport") { pattern = "[filename],pick.accnos.report"; } else if (type == "alignreport") { pattern = "[filename],pick.align.report"; } @@ -614,9 +614,6 @@ int GetSeqsCommand::readList(){ map variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); - string outputFileName = getOutputFileName("list", variables); - ofstream out; - m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(listfile, in); @@ -629,8 +626,6 @@ int GetSeqsCommand::readList(){ while(!in.eof()){ selectedCount = 0; - - if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } //read in list vector ListVector list(in); @@ -638,6 +633,18 @@ int GetSeqsCommand::readList(){ //make a new list vector ListVector newList; newList.setLabel(list.getLabel()); + + variables["[distance]"] = list.getLabel(); + string outputFileName = getOutputFileName("list", variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); + + vector binLabels = list.getLabels(); + vector newBinLabels; + + if (m->control_pressed) { in.close(); out.close(); return 0; } //for each bin for (int i = 0; i < list.getNumBins(); i++) { @@ -657,23 +664,26 @@ int GetSeqsCommand::readList(){ //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma - newList.push_back(newNames); + newList.push_back(newNames); + newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; + newList.setLabels(newBinLabels); + newList.printHeaders(out); newList.print(out); } m->gobble(in); + out.close(); } in.close(); - out.close(); + if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); } - outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine(); diff --git a/getsharedotucommand.cpp b/getsharedotucommand.cpp index 1074d32..9302d1d 100644 --- a/getsharedotucommand.cpp +++ b/getsharedotucommand.cpp @@ -431,6 +431,7 @@ int GetSharedOTUCommand::process(ListVector* shared) { int num = 0; //go through each bin, find out if shared + vector binLabels = shared->getLabels(); for (int i = 0; i < shared->getNumBins(); i++) { if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; } @@ -452,7 +453,7 @@ int GetSharedOTUCommand::process(ListVector* shared) { //find group string seqGroup = groupMap->getGroup(name); if (output != "accnos") { - namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + toString(i+1))); + namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + binLabels[i])); }else { namesOfSeqsInThisBin.push_back(name); } if (seqGroup == "not found") { m->mothurOut(name + " is not in your groupfile. Please correct."); m->mothurOutEndLine(); exit(1); } @@ -691,7 +692,7 @@ int GetSharedOTUCommand::process(vector& lookup) { for(int j = 0; j < lookup.size(); j++) { string seqGroup = lookup[j]->getGroup(); - string name = m->currentBinLabels[i]; + string name = m->currentSharedBinLabels[i]; if (lookup[j]->getAbundance(i) != 0) { if (output != "accnos") { diff --git a/hclustercommand.cpp b/hclustercommand.cpp index b991ccd..06d7260 100644 --- a/hclustercommand.cpp +++ b/hclustercommand.cpp @@ -315,7 +315,9 @@ int HClusterCommand::execute(){ }else{ m->mothurOut("Error: no list vector!"); m->mothurOutEndLine(); return 0; } - + + list->printHeaders(listFile); + float previousDist = 0.00000; float rndPreviousDist = 0.00000; oldRAbund = *rabund; diff --git a/heatmap.cpp b/heatmap.cpp index 367342c..514c7af 100644 --- a/heatmap.cpp +++ b/heatmap.cpp @@ -119,7 +119,7 @@ string HeatMap::getPic(vector lookup) { } //sort lookup so shared bins are on top - vector sortedLabels = m->currentBinLabels; + vector sortedLabels = m->currentSharedBinLabels; if (sorted != "none") { sortedLabels = sortSharedVectors(lookup); } vector > scaleRelAbund; @@ -221,7 +221,7 @@ vector HeatMap::sortSharedVectors(vector& lookup){ map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; - vector sortedLabels = m->currentBinLabels; + vector sortedLabels = m->currentSharedBinLabels; /****************** find order of otus **********************/ if (sorted == "shared") { @@ -254,7 +254,7 @@ vector HeatMap::sortSharedVectors(vector& lookup){ int newAbund = looktemp[j]->getAbundance(i); // 1 -> 3 lookup[j]->set(place[i], newAbund, looktemp[j]->getGroup()); //binNumber, abundance, group } - sortedLabels[place[i]] = m->currentBinLabels[i]; + sortedLabels[place[i]] = m->currentSharedBinLabels[i]; } //delete looktemp -- Sarah look at - this is causing segmentation faults @@ -430,7 +430,7 @@ string HeatMap::getPic(vector lookup) { } //sort lookup so shared bins are on top - vector sortedLabels = m->currentBinLabels; + vector sortedLabels = m->currentSharedBinLabels; if (sorted != "none") { sortedLabels = sortSharedVectors(lookup); } vector > scaleRelAbund; @@ -532,7 +532,7 @@ vector HeatMap::sortSharedVectors(vector& look map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; - vector sortedLabels = m->currentBinLabels; + vector sortedLabels = m->currentSharedBinLabels; /****************** find order of otus **********************/ if (sorted == "shared") { @@ -564,7 +564,7 @@ vector HeatMap::sortSharedVectors(vector& look for (int j = 0; j < looktemp.size(); j++) { // 3 -> 2 float newAbund = looktemp[j]->getAbundance(i); // 1 -> 3 lookup[j]->set(place[i], newAbund, looktemp[j]->getGroup()); //binNumber, abundance, group - sortedLabels[place[i]] = m->currentBinLabels[i]; + sortedLabels[place[i]] = m->currentSharedBinLabels[i]; } } diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index fd818ac..ab6e670 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -490,17 +490,17 @@ int IndicatorCommand::GetIndicatorSpecies(){ if (m->control_pressed) { out.close(); return 0; } - out << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; + out << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; } else { out << "<" << (1/(float)iters) << endl; } if (pValues[j] <= 0.05) { - cout << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; + cout << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; string pValueString = "<" + toString((1/(float)iters)); if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} else { cout << "<" << (1/(float)iters); } - m->mothurOutJustToLog(m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); + m->mothurOutJustToLog(m->currentSharedBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); m->mothurOutEndLine(); } } @@ -538,7 +538,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //print headings out << "TreeNode\t"; - for (int i = 0; i < numBins; i++) { out << m->currentBinLabels[i] << "_IndGroups" << '\t' << m->currentBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; } + for (int i = 0; i < numBins; i++) { out << m->currentSharedBinLabels[i] << "_IndGroups" << '\t' << m->currentSharedBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; } out << endl; m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicator_Groups\tIndicatorValue\tpValue\n"); @@ -697,11 +697,11 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ } if (pValues[j] <= 0.05) { - cout << i+1 << '\t' << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; + cout << i+1 << '\t' << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; string pValueString = "<" + toString((1/(float)iters)); if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} else { cout << "<" << (1/(float)iters); } - m->mothurOutJustToLog(toString(i) + "\t" + m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); + m->mothurOutJustToLog(toString(i) + "\t" + m->currentSharedBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); m->mothurOutEndLine(); } } diff --git a/kruskalwalliscommand.cpp b/kruskalwalliscommand.cpp index 777444b..dc297e1 100644 --- a/kruskalwalliscommand.cpp +++ b/kruskalwalliscommand.cpp @@ -307,7 +307,7 @@ int KruskalWallisCommand::process(vector& lookup, DesignMap double H = linear.calcKruskalWallis(values, pValue); //output H and signifigance - out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl; + out << m->currentSharedBinLabels[i] << '\t' << H << '\t' << pValue << endl; } out.close(); diff --git a/lefsecommand.cpp b/lefsecommand.cpp index 8d1768c..0403c91 100644 --- a/lefsecommand.cpp +++ b/lefsecommand.cpp @@ -1009,8 +1009,8 @@ int LefseCommand::printResults(vector< vector > means, map if (maxMean > logMaxMean) { logMaxMean = maxMean; } logMaxMean = log10(logMaxMean); - out << m->currentBinLabels[i] << '\t' << logMaxMean << '\t'; - if (m->debug) { temp = m->currentBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; } + out << m->currentSharedBinLabels[i] << '\t' << logMaxMean << '\t'; + if (m->debug) { temp = m->currentSharedBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; } map::iterator it = sigLDA.find(i); if (it != sigLDA.end()) { @@ -1040,7 +1040,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector >& adjustedLook for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->control_pressed) { break; } - cout << m->currentBinLabels[it->first] << " <- c("; + cout << m->currentSharedBinLabels[it->first] << " <- c("; for (int h = 0; h < rand_s.size()-1; h++) { cout << (adjustedLookup[count][rand_s[h]]) << ", "; } cout << (adjustedLookup[count][rand_s[rand_s.size()-1]]) << ")\n"; count++; @@ -1096,7 +1096,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector >& adjustedLook for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->control_pressed) { break; } - tempOutput += "\"" + m->currentBinLabels[it->first] + "\"=" + m->currentBinLabels[it->first] + ","; + tempOutput += "\"" + m->currentSharedBinLabels[it->first] + "\"=" + m->currentSharedBinLabels[it->first] + ","; } //tempOutput = tempOutput.substr(0, tempOutput.length()-1); tempOutput += " class=treatments"; @@ -1109,7 +1109,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector >& adjustedLook for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->control_pressed) { break; } - tempOutput += m->currentBinLabels[it->first] + "+"; + tempOutput += m->currentSharedBinLabels[it->first] + "+"; } tempOutput = tempOutput.substr(0, tempOutput.length()-1); //rip off extra plus sign tempOutput += "), data = dat, tol = 1e-10))"; @@ -1168,7 +1168,7 @@ int LefseCommand::makeShared(int numDesignLines) { lookup.push_back(temp); } - m->currentBinLabels.clear(); + m->currentSharedBinLabels.clear(); int count = 0; while (!in.eof()) { if (m->control_pressed) { return 0; } @@ -1189,7 +1189,7 @@ int LefseCommand::makeShared(int numDesignLines) { lookup[i-1]->push_back(value, toString(i-1)); //cout << pieces[i] << '\t'; } - m->currentBinLabels.push_back(toString(count)); + m->currentSharedBinLabels.push_back(toString(count)); //m->currentBinLabels.push_back(pieces[0]); //cout << line<< endl; //cout << endl; diff --git a/listotulabelscommand.cpp b/listotulabelscommand.cpp index 037c822..512dd6d 100644 --- a/listotulabelscommand.cpp +++ b/listotulabelscommand.cpp @@ -461,7 +461,7 @@ int ListOtuLabelsCommand::createList(vector& lookup){ ofstream out; m->openOutputFile(outputFileName, out); - for (int i = 0; i < m->currentBinLabels.size(); i++) { out << m->currentBinLabels[i] << endl; } + for (int i = 0; i < m->currentSharedBinLabels.size(); i++) { out << m->currentSharedBinLabels[i] << endl; } out.close(); @@ -485,7 +485,7 @@ int ListOtuLabelsCommand::createList(vector& lookup){ ofstream out; m->openOutputFile(outputFileName, out); - for (int i = 0; i < m->currentBinLabels.size(); i++) { out << m->currentBinLabels[i] << endl; } + for (int i = 0; i < m->currentSharedBinLabels.size(); i++) { out << m->currentSharedBinLabels[i] << endl; } out.close(); @@ -507,20 +507,8 @@ int ListOtuLabelsCommand::createList(ListVector*& list){ ofstream out; m->openOutputFile(outputFileName, out); - string snumBins = toString(list->getNumBins()); - for (int i = 0; i < list->getNumBins(); i++) { - if (m->control_pressed) { break; } - - string otuLabel = "Otu"; - string sbinNumber = toString(i+1); - if (sbinNumber.length() < snumBins.length()) { - int diff = snumBins.length() - sbinNumber.length(); - for (int h = 0; h < diff; h++) { otuLabel += "0"; } - } - otuLabel += sbinNumber; - - out << otuLabel << endl; - } + vector binLabels = list->getLabels(); + for (int i = 0; i < binLabels.size(); i++) { out << binLabels[i] << endl; } out.close(); diff --git a/listvector.cpp b/listvector.cpp index 2758c94..90df802 100644 --- a/listvector.cpp +++ b/listvector.cpp @@ -67,8 +67,59 @@ ListVector::ListVector(string id, vector lv) : DataVector(id), data(lv){ ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { int hold; - f >> label >> hold; + + //are we at the beginning of the file?? + if (m->saveNextLabel == "") { + f >> label; + + //is this a shared file that has headers + if (label == "label") { + + //gets "numOtus" + f >> label; m->gobble(f); + + //eat rest of line + label = m->getline(f); m->gobble(f); + + //parse labels to save + istringstream iStringStream(label); + m->listBinLabelsInFile.clear(); + while(!iStringStream.eof()){ + if (m->control_pressed) { break; } + string temp; + iStringStream >> temp; m->gobble(iStringStream); + + m->listBinLabelsInFile.push_back(temp); + } + + f >> label >> hold; + }else { + //read in first row + f >> hold; + + //make binlabels because we don't have any + string snumBins = toString(hold); + m->listBinLabelsInFile.clear(); + for (int i = 0; i < hold; i++) { + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + m->listBinLabelsInFile.push_back(binLabel); + } + } + m->saveNextLabel = label; + }else { + f >> label >> hold; + m->saveNextLabel = label; + } + binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold); + data.assign(hold, ""); string inputData = ""; @@ -77,6 +128,8 @@ ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numS set(i, inputData); } m->gobble(f); + + if (f.eof()) { m->saveNextLabel = ""; } } catch(exception& e) { m->errorOut(e, "ListVector", "ListVector"); @@ -109,6 +162,60 @@ void ListVector::set(int binNumber, string seqNames){ string ListVector::get(int index){ return data[index]; } +/***********************************************************************/ + +void ListVector::setLabels(vector labels){ + try { + binLabels = labels; + } + catch(exception& e) { + m->errorOut(e, "ListVector", "setLabels"); + exit(1); + } +} + +/***********************************************************************/ +//could potentially end up with duplicate binlabel names with code below. +//we don't currently use them in a way that would do that. +//if you had a listfile that had been subsampled and then added to it, dup names would be possible. +vector ListVector::getLabels(){ + try { + + string tagHeader = "Otu"; + if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; } + + if (binLabels.size() < data.size()) { + string snumBins = toString(numBins); + + for (int i = 0; i < numBins; i++) { + string binLabel = tagHeader; + + if (i < binLabels.size()) { //label exists, check leading zeros length + string sbinNumber = m->getSimpleLabel(binLabels[i]); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + binLabels[i] = binLabel; + }else{ + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + binLabels.push_back(binLabel); + } + } + } + return binLabels; + } + catch(exception& e) { + m->errorOut(e, "ListVector", "getLabels"); + exit(1); + } +} /***********************************************************************/ @@ -150,6 +257,52 @@ void ListVector::clear(){ } +/***********************************************************************/ +void ListVector::printHeaders(ostream& output){ + try { + string snumBins = toString(numBins); + output << "label\tnumOtus\t"; + if (m->sharedHeaderMode == "tax") { + for (int i = 0; i < numBins; i++) { + + //if there is a bin label use it otherwise make one + string binLabel = "PhyloType"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + if (i < binLabels.size()) { binLabel = binLabels[i]; } + + output << binLabel << '\t'; + } + output << endl; + }else { + for (int i = 0; i < numBins; i++) { + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + if (i < binLabels.size()) { binLabel = binLabels[i]; } + + output << binLabel << '\t'; + } + + output << endl; + } + m->printedListHeaders = true; + } + catch(exception& e) { + m->errorOut(e, "ListVector", "printHeaders"); + exit(1); + } +} + /***********************************************************************/ void ListVector::print(ostream& output){ diff --git a/listvector.hpp b/listvector.hpp index dcf01a0..b2bc511 100644 --- a/listvector.hpp +++ b/listvector.hpp @@ -20,7 +20,7 @@ public: ListVector(int); // ListVector(const ListVector&); ListVector(string, vector); - ListVector(const ListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){}; + ListVector(const ListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs), binLabels(lv.binLabels) {}; ListVector(ifstream&); ~ListVector(){}; @@ -30,11 +30,14 @@ public: void set(int, string); string get(int); + vector getLabels(); + void setLabels(vector); void push_back(string); void resize(int); void clear(); int size(); void print(ostream&); + void printHeaders(ostream&); RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); @@ -45,6 +48,7 @@ private: int maxRank; int numBins; int numSeqs; + vector binLabels; }; diff --git a/makebiomcommand.cpp b/makebiomcommand.cpp index 248b3c8..0d289d7 100644 --- a/makebiomcommand.cpp +++ b/makebiomcommand.cpp @@ -447,10 +447,10 @@ int MakeBiomCommand::getBiom(vector& lookup){ string rowBack = "\", \"metadata\":"; for (int i = 0; i < numBins-1; i++) { if (m->control_pressed) { out.close(); return 0; } - if (!picrust) { out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n"; } + if (!picrust) { out << rowFront << m->currentSharedBinLabels[i] << rowBack << metadata[i] << "},\n"; } else { out << rowFront << picrustLabels[i] << rowBack << metadata[i] << "},\n"; } } - if (!picrust) { out << rowFront << m->currentBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; } + if (!picrust) { out << rowFront << m->currentSharedBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; } else { out << rowFront << picrustLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; } //get column info /*"columns": [ @@ -574,7 +574,7 @@ vector MakeBiomCommand::getMetaData(vector& lookup, in.close(); //should the labels be Otu001 or PhyloType001 - string firstBin = m->currentBinLabels[0]; + string firstBin = m->currentSharedBinLabels[0]; string binTag = "Otu"; if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType"; } @@ -611,9 +611,9 @@ vector MakeBiomCommand::getMetaData(vector& lookup, if (m->control_pressed) { return metadata; } - it = labelTaxMap.find(m->getSimpleLabel(m->currentBinLabels[i])); + it = labelTaxMap.find(m->getSimpleLabel(m->currentSharedBinLabels[i])); - if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; } + if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentSharedBinLabels[i] + ".\n"); m->control_pressed = true; } else { if (picrust) { string temp = it->second; m->removeConfidences(temp); diff --git a/makelefsecommand.cpp b/makelefsecommand.cpp index 73f9db2..c859bfb 100644 --- a/makelefsecommand.cpp +++ b/makelefsecommand.cpp @@ -289,7 +289,7 @@ int MakeLefseCommand::runRelabund(map& consTax, vectorgetNumBins(); i++) { //process each otu if (m->control_pressed) { break; } - string nameOfOtu = m->currentBinLabels[i]; + string nameOfOtu = m->currentSharedBinLabels[i]; if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy map::iterator it = consTax.find(nameOfOtu); if (it != consTax.end()) { @@ -299,7 +299,7 @@ int MakeLefseCommand::runRelabund(map& consTax, vectorremoveConfidences(nameOfOtu); for (int j = 0; j < nameOfOtu.length()-1; j++) { - if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentBinLabels[i] + '|'; } + if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentSharedBinLabels[i] + '|'; } else { fixedName += nameOfOtu[j]; } } nameOfOtu = fixedName; diff --git a/mergegroupscommand.cpp b/mergegroupscommand.cpp index bfd1b84..0ddc3d4 100644 --- a/mergegroupscommand.cpp +++ b/mergegroupscommand.cpp @@ -327,7 +327,7 @@ int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } process(lookup, out); processedLabels.insert(lookup[0]->getLabel()); @@ -341,7 +341,7 @@ int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){ lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } process(lookup, out); processedLabels.insert(lookup[0]->getLabel()); @@ -383,7 +383,7 @@ int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } process(lookup, out); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } diff --git a/metastatscommand.cpp b/metastatscommand.cpp index 204d83e..33b559f 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -646,7 +646,7 @@ int MetaStatsCommand::convertToInput(vector& subset, string out << subset[subset.size()-1]->getGroup() << endl; for (int i = 0; i < subset[0]->getNumBins(); i++) { - out << m->currentBinLabels[i] << '\t'; + out << m->currentSharedBinLabels[i] << '\t'; for (int j = 0; j < subset.size()-1; j++) { out << subset[j]->getAbundance(i) << '\t'; } diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp index 85db5e0..a207925 100644 --- a/mgclustercommand.cpp +++ b/mgclustercommand.cpp @@ -295,6 +295,7 @@ int MGClusterCommand::execute(){ m->openOutputFile(rabundFileName, rabundFile); } m->openOutputFile(listFileName, listFile); + list->printHeaders(listFile); if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; diff --git a/mothurmetastats.cpp b/mothurmetastats.cpp index 4715de9..f6a7800 100644 --- a/mothurmetastats.cpp +++ b/mothurmetastats.cpp @@ -229,7 +229,7 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector if (m->control_pressed) { out.close(); return 0; } //if there are binlabels use them otherwise count. - if (m->binLabelsInFile.size() == row) { out << m->binLabelsInFile[i] << '\t'; } + if (i < m->currentSharedBinLabels.size()) { out << m->currentSharedBinLabels[i] << '\t'; } else { out << (i+1) << '\t'; } out << C1[i][0] << '\t' << C1[i][1] << '\t' << C1[i][2] << '\t' << C2[i][0] << '\t' << C2[i][1] << '\t' << C2[i][2] << '\t' << pvalues[i] << '\t' << qvalues[i] << endl; diff --git a/mothurout.h b/mothurout.h index f58f4e8..363a7a8 100644 --- a/mothurout.h +++ b/mothurout.h @@ -67,10 +67,12 @@ class MothurOut { vector getAllGroups() { sort(namesOfGroups.begin(), namesOfGroups.end()); return namesOfGroups; } vector Treenames; //map names; - vector binLabelsInFile; - vector currentBinLabels; + vector sharedBinLabelsInFile; + vector currentSharedBinLabels; + vector listBinLabelsInFile; + //vector currentListBinLabels; string saveNextLabel, argv, sharedHeaderMode, groupMode; - bool printedHeaders, commandInputsConvertError, changedSeqNames, modifyNames; + bool printedSharedHeaders, printedListHeaders, commandInputsConvertError, changedSeqNames, modifyNames; //functions from mothur.h //file operations @@ -270,7 +272,8 @@ class MothurOut { counttablefile = ""; summaryfile = ""; gui = false; - printedHeaders = false; + printedSharedHeaders = false; + printedListHeaders = false; commandInputsConvertError = false; mothurCalling = false; debug = false; diff --git a/normalizesharedcommand.cpp b/normalizesharedcommand.cpp index acd8208..1000e42 100644 --- a/normalizesharedcommand.cpp +++ b/normalizesharedcommand.cpp @@ -458,7 +458,7 @@ int NormalizeSharedCommand::execute(){ int NormalizeSharedCommand::normalize(vector& thisLookUp){ try { //save mothurOut's binLabels to restore for next label - vector saveBinLabels = m->currentBinLabels; + vector saveBinLabels = m->currentSharedBinLabels; if (pickedGroups) { eliminateZeroOTUS(thisLookUp); } @@ -540,7 +540,7 @@ int NormalizeSharedCommand::normalize(vector& thisLookUp){ out.close(); - m->currentBinLabels = saveBinLabels; + m->currentSharedBinLabels = saveBinLabels; return 0; } @@ -555,7 +555,7 @@ int NormalizeSharedCommand::normalize(vector& thisLook try { //save mothurOut's binLabels to restore for next label - vector saveBinLabels = m->currentBinLabels; + vector saveBinLabels = m->currentSharedBinLabels; map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile)); @@ -629,7 +629,7 @@ int NormalizeSharedCommand::normalize(vector& thisLook out.close(); - m->currentBinLabels = saveBinLabels; + m->currentSharedBinLabels = saveBinLabels; return 0; } @@ -675,7 +675,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } newBinLabels.push_back(binLabel); } @@ -684,7 +684,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& thisl for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; - m->currentBinLabels = newBinLabels; + m->currentSharedBinLabels = newBinLabels; return 0; @@ -731,7 +731,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } newBinLabels.push_back(binLabel); } @@ -740,7 +740,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector& for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; - m->currentBinLabels = newBinLabels; + m->currentSharedBinLabels = newBinLabels; return 0; diff --git a/otuassociationcommand.cpp b/otuassociationcommand.cpp index 9ccdeea..705540c 100644 --- a/otuassociationcommand.cpp +++ b/otuassociationcommand.cpp @@ -360,7 +360,7 @@ int OTUAssociationCommand::process(vector& lookup){ else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; } - if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; } + if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << m->currentSharedBinLabels[k] << '\t' << coef << '\t' << sig << endl; } } } }else { //compare otus to metadata @@ -377,7 +377,7 @@ int OTUAssociationCommand::process(vector& lookup){ else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; } - if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; } + if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; } } } @@ -523,7 +523,7 @@ int OTUAssociationCommand::process(vector& lookup){ else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; } - if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; } + if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << m->currentSharedBinLabels[k] << '\t' << coef << '\t' << sig << endl; } } } }else { //compare otus to metadata @@ -540,7 +540,7 @@ int OTUAssociationCommand::process(vector& lookup){ else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; } - if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; } + if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; } } } diff --git a/otuhierarchycommand.cpp b/otuhierarchycommand.cpp index a294a77..07e3c1d 100644 --- a/otuhierarchycommand.cpp +++ b/otuhierarchycommand.cpp @@ -175,11 +175,11 @@ int OtuHierarchyCommand::execute(){ } //map sequences to bin number in the "little" otu - map littleBins; + map littleBins; + vector binLabels0 = lists[0].getLabels(); for (int i = 0; i < lists[0].getNumBins(); i++) { if (m->control_pressed) { return 0; } - string bin = lists[0].get(i); vector names; m->splitAtComma(bin, names); for (int j = 0; j < names.size(); j++) { littleBins[names[j]] = i; } @@ -195,17 +195,17 @@ int OtuHierarchyCommand::execute(){ m->openOutputFile(outputFileName, out); //go through each bin in "big" otu and output the bins in "little" otu which created it + vector binLabels1 = lists[1].getLabels(); for (int i = 0; i < lists[1].getNumBins(); i++) { if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; } string binnames = lists[1].get(i); vector names; m->splitAtComma(binnames, names); - //output column 1 if (output == "name") { out << binnames << '\t'; } - else { out << (i+1) << '\t'; } + else { out << binLabels1[i] << '\t'; } map bins; //bin numbers in little that are in this bin in big map::iterator it; @@ -216,7 +216,7 @@ int OtuHierarchyCommand::execute(){ string col2 = ""; for (it = bins.begin(); it != bins.end(); it++) { if (output == "name") { col2 += lists[0].get(it->first) + "\t"; } - else { col2 += toString(it->first) + "\t"; } + else { col2 += binLabels0[it->first] + "\t"; } } //output column 2 diff --git a/randomforest.cpp b/randomforest.cpp index d998de6..acf87df 100644 --- a/randomforest.cpp +++ b/randomforest.cpp @@ -180,7 +180,7 @@ int RandomForest::calcForrestVariableImportance(string filename) { m->openOutputFile(filename, out); out <<"OTU\tMean decrease accuracy\n"; for (int i = 0; i < globalVariableRanks.size(); i++) { - out << m->currentBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl; + out << m->currentSharedBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl; } out.close(); return 0; diff --git a/removegroupscommand.cpp b/removegroupscommand.cpp index 9eefa43..9db35cb 100644 --- a/removegroupscommand.cpp +++ b/removegroupscommand.cpp @@ -560,9 +560,9 @@ int RemoveGroupsCommand::readShared(){ m->setGroups(groupsToKeep); m->clearAllGroups(); m->saveNextLabel = ""; - m->printedHeaders = false; - m->currentBinLabels.clear(); - m->binLabelsInFile.clear(); + m->printedSharedHeaders = false; + m->currentSharedBinLabels.clear(); + m->sharedBinLabelsInFile.clear(); InputData input(sharedfile, "sharedfile"); lookup = input.getSharedRAbundVectors(); diff --git a/removelineagecommand.cpp b/removelineagecommand.cpp index 01f45ca..761de14 100644 --- a/removelineagecommand.cpp +++ b/removelineagecommand.cpp @@ -843,9 +843,9 @@ int RemoveLineageCommand::readShared(){ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; } //is this otu on the list - if (names.count(m->getSimpleLabel(m->currentBinLabels[i])) == 0) { + if (names.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) == 0) { wroteSomething = true; - newLabels.push_back(m->currentBinLabels[i]); + newLabels.push_back(m->currentSharedBinLabels[i]); for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup()); } @@ -865,7 +865,7 @@ int RemoveLineageCommand::readShared(){ for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } - m->currentBinLabels = newLabels; + m->currentSharedBinLabels = newLabels; newLookup[0]->printHeaders(out); diff --git a/removeotulabelscommand.cpp b/removeotulabelscommand.cpp index 161c4bf..a882f6b 100644 --- a/removeotulabelscommand.cpp +++ b/removeotulabelscommand.cpp @@ -462,9 +462,9 @@ int RemoveOtuLabelsCommand::readShared(){ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; } //is this otu on the list - if (labels.count(m->getSimpleLabel(m->currentBinLabels[i])) == 0) { + if (labels.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) == 0) { wroteSomething = true; - newLabels.push_back(m->currentBinLabels[i]); + newLabels.push_back(m->currentSharedBinLabels[i]); for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup()); } @@ -484,7 +484,7 @@ int RemoveOtuLabelsCommand::readShared(){ for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } - m->currentBinLabels = newLabels; + m->currentSharedBinLabels = newLabels; newLookup[0]->printHeaders(out); diff --git a/removerarecommand.cpp b/removerarecommand.cpp index 2eb5f63..c378e41 100644 --- a/removerarecommand.cpp +++ b/removerarecommand.cpp @@ -850,7 +850,7 @@ int RemoveRareCommand::processLookup(vector& lookup){ //eliminates zero otus if (allZero) { for (int j = 0; j < newRabunds.size(); j++) { newRabunds[j].pop_back(); } } - else { headers.push_back(m->currentBinLabels[i]); } + else { headers.push_back(m->currentSharedBinLabels[i]); } } }else { //for each otu @@ -867,7 +867,7 @@ int RemoveRareCommand::processLookup(vector& lookup){ //eliminates otus below rare cutoff if (totalAbund <= nseqs) { for (int j = 0; j < newRabunds.size(); j++) { newRabunds[j].pop_back(); } } - else { headers.push_back(m->currentBinLabels[i]); } + else { headers.push_back(m->currentSharedBinLabels[i]); } } } diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 1e078b3..3be9bb1 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -60,7 +60,7 @@ string SharedCommand::getOutputPattern(string type) { try { string pattern = ""; - if (type == "shared") { pattern = "[filename],shared"; } + if (type == "shared") { pattern = "[filename],shared-[filename],[distance],shared"; } else if (type == "rabund") { pattern = "[filename],[group],rabund"; } else if (type == "group") { pattern = "[filename],[group],groups"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } @@ -249,21 +249,9 @@ int SharedCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } - - //getting output filename - string filename = ""; - if (listfile != "") { filename = listfile; } - else { filename = biomfile; } - - if (outputDir == "") { outputDir += m->hasPath(filename); } - - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename)); - filename = getOutputFileName("shared",variables); - outputNames.push_back(filename); outputTypes["shared"].push_back(filename); - - if (listfile != "") { createSharedFromListGroup(filename); } - else { createSharedFromBiom(filename); } + + if (listfile != "") { createSharedFromListGroup(); } + else { createSharedFromBiom(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } } @@ -297,8 +285,17 @@ int SharedCommand::execute(){ } } //********************************************************************************************************************** -int SharedCommand::createSharedFromBiom(string filename) { +int SharedCommand::createSharedFromBiom() { try { + //getting output filename + string filename = biomfile; + if (outputDir == "") { outputDir += m->hasPath(filename); } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename)); + filename = getOutputFileName("shared",variables); + outputNames.push_back(filename); outputTypes["shared"].push_back(filename); + ofstream out; m->openOutputFile(filename, out); @@ -458,7 +455,7 @@ int SharedCommand::createSharedFromBiom(string filename) { if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); } else { string thisLine = it->second; - m->currentBinLabels = otuNames; + m->currentSharedBinLabels = otuNames; //read data vector lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size()); @@ -611,7 +608,7 @@ int SharedCommand::eliminateZeroOTUS(vector& thislookup) { for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } newBinLabels.push_back(binLabel); } @@ -620,7 +617,7 @@ int SharedCommand::eliminateZeroOTUS(vector& thislookup) { for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; - m->currentBinLabels = newBinLabels; + m->currentSharedBinLabels = newBinLabels; return 0; @@ -750,10 +747,8 @@ string SharedCommand::getTag(string& line) { } } //********************************************************************************************************************** -int SharedCommand::createSharedFromListGroup(string filename) { +int SharedCommand::createSharedFromListGroup() { try { - ofstream out; - m->openOutputFile(filename, out); GroupMap* groupMap = NULL; CountTable* countTable = NULL; @@ -780,6 +775,20 @@ int SharedCommand::createSharedFromListGroup(string filename) { m->setGroups(Groups); }else { pickedGroups = true; } + + ofstream out; + string filename = ""; + if (!pickedGroups) { + string filename = listfile; + if (outputDir == "") { outputDir += m->hasPath(filename); } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename)); + filename = getOutputFileName("shared",variables); + outputNames.push_back(filename); outputTypes["shared"].push_back(filename); + m->openOutputFile(filename, out); + } + //fill filehandles with neccessary ofstreams int i; ofstream* temp; @@ -814,7 +823,7 @@ int SharedCommand::createSharedFromListGroup(string filename) { if (m->control_pressed) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } - out.close(); m->mothurRemove(filename); + out.close(); if (!pickedGroups) { m->mothurRemove(filename); } for (int i=0; imothurRemove(rabundFIleName); } @@ -831,7 +840,7 @@ int SharedCommand::createSharedFromListGroup(string filename) { if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; - out.close(); m->mothurRemove(filename); //remove blank shared file you made + out.close(); if (!pickedGroups) { m->mothurRemove(filename); } //remove blank shared file you made //delete memory for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } @@ -845,9 +854,10 @@ int SharedCommand::createSharedFromListGroup(string filename) { if ((pickedGroups) && (m->groupMode == "group")) { //make new group file string groups = ""; if (m->getNumGroups() < 4) { - for (int i = 0; i < m->getNumGroups(); i++) { + for (int i = 0; i < m->getNumGroups()-1; i++) { groups += (m->getGroups())[i] + "."; } + groups+=(m->getGroups())[m->getNumGroups()-1]; }else { groups = "merge"; } map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile)); @@ -877,7 +887,7 @@ int SharedCommand::createSharedFromListGroup(string filename) { if (m->control_pressed) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } - out.close(); m->mothurRemove(filename); + if (!pickedGroups) { out.close(); m->mothurRemove(filename); } for (int i=0; imothurRemove(rabundFIleName); } @@ -889,23 +899,42 @@ int SharedCommand::createSharedFromListGroup(string filename) { lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (pickedGroups) { //check for otus with no seqs in them - eliminateZeroOTUS(lookup); - } if (m->control_pressed) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } - out.close(); m->mothurRemove(filename); + if (!pickedGroups) { out.close(); m->mothurRemove(filename); } for (int i=0; imothurRemove(rabundFIleName); } return 0; } - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - printSharedData(lookup, out); //prints info to the .shared file + //if picked groups must split the shared file by label + if (pickedGroups) { + string filename = listfile; + if (outputDir == "") { outputDir += m->hasPath(filename); } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename)); + variables["[distance]"] = lookup[0]->getLabel(); + filename = getOutputFileName("shared",variables); + outputNames.push_back(filename); outputTypes["shared"].push_back(filename); + ofstream out2; + m->openOutputFile(filename, out2); + + vector savedLabels = m->currentSharedBinLabels; + eliminateZeroOTUS(lookup); + lookup[0]->printHeaders(out2); + printSharedData(lookup, out2); + out2.close(); + m->currentSharedBinLabels = savedLabels; //restore old labels + + }else { + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } + printSharedData(lookup, out); //prints info to the .shared file + } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } processedLabels.insert(SharedList->getLabel()); @@ -920,24 +949,43 @@ int SharedCommand::createSharedFromListGroup(string filename) { lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (pickedGroups) { //check for otus with no seqs in them - eliminateZeroOTUS(lookup); - } - if (m->control_pressed) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } - out.close(); m->mothurRemove(filename); + if (!pickedGroups) { out.close(); m->mothurRemove(filename); } for (int i=0; imothurRemove(rabundFIleName); } return 0; } - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - printSharedData(lookup, out); //prints info to the .shared file + //if picked groups must split the shared file by label + if (pickedGroups) { + string filename = listfile; + if (outputDir == "") { outputDir += m->hasPath(filename); } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename)); + variables["[distance]"] = lookup[0]->getLabel(); + filename = getOutputFileName("shared",variables); + outputNames.push_back(filename); outputTypes["shared"].push_back(filename); + ofstream out2; + m->openOutputFile(filename, out2); + + vector savedLabels = m->currentSharedBinLabels; + eliminateZeroOTUS(lookup); + lookup[0]->printHeaders(out2); + printSharedData(lookup, out2); + out2.close(); + m->currentSharedBinLabels = savedLabels; //restore old labels + + }else { + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } + printSharedData(lookup, out); //prints info to the .shared file + } + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } processedLabels.insert(SharedList->getLabel()); @@ -970,27 +1018,46 @@ int SharedCommand::createSharedFromListGroup(string filename) { lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - if (pickedGroups) { //check for otus with no seqs in them - eliminateZeroOTUS(lookup); - } if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } - out.close(); m->mothurRemove(filename); + if (!pickedGroups) { out.close(); m->mothurRemove(filename); } for (int i=0; imothurRemove(rabundFIleName); } return 0; } - if (!m->printedHeaders) { lookup[0]->printHeaders(out); } - printSharedData(lookup, out); //prints info to the .shared file + //if picked groups must split the shared file by label + if (pickedGroups) { + string filename = listfile; + if (outputDir == "") { outputDir += m->hasPath(filename); } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename)); + variables["[distance]"] = lookup[0]->getLabel(); + filename = getOutputFileName("shared",variables); + outputNames.push_back(filename); outputTypes["shared"].push_back(filename); + ofstream out2; + m->openOutputFile(filename, out2); + + vector savedLabels = m->currentSharedBinLabels; + eliminateZeroOTUS(lookup); + lookup[0]->printHeaders(out2); + printSharedData(lookup, out2); + out2.close(); + m->currentSharedBinLabels = savedLabels; //restore old labels + + }else { + if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); } + printSharedData(lookup, out); //prints info to the .shared file + } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } delete SharedList; } - out.close(); + if (!pickedGroups) { out.close(); } for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; @@ -999,7 +1066,7 @@ int SharedCommand::createSharedFromListGroup(string filename) { if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (m->control_pressed) { - m->mothurRemove(filename); + if (!pickedGroups) { m->mothurRemove(filename); } for (int i=0; imothurRemove(rabundFIleName); } diff --git a/sharedcommand.h b/sharedcommand.h index 39ef9a7..a916c85 100644 --- a/sharedcommand.h +++ b/sharedcommand.h @@ -45,8 +45,8 @@ private: bool isValidGroup(string, vector); int eliminateZeroOTUS(vector&); int ListGroupSameSeqs(vector&, SharedListVector*); - int createSharedFromListGroup(string); - int createSharedFromBiom(string); + int createSharedFromListGroup(); + int createSharedFromBiom(); string getTag(string&); vector readRows(string, int&); int getDims(string, int&, int&); diff --git a/sharedlistvector.cpp b/sharedlistvector.cpp index 2ce250b..8fabc5b 100644 --- a/sharedlistvector.cpp +++ b/sharedlistvector.cpp @@ -35,16 +35,70 @@ SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numB countTable->readTable(m->getCountTableFile(), true, false); } - int hold; - string inputData; - f >> label >> hold; - - data.assign(hold, ""); + int hold; + + //are we at the beginning of the file?? + if (m->saveNextLabel == "") { + f >> label; + + //is this a shared file that has headers + if (label == "label") { + + //gets "numOtus" + f >> label; m->gobble(f); + + //eat rest of line + label = m->getline(f); m->gobble(f); + + //parse labels to save + istringstream iStringStream(label); + m->listBinLabelsInFile.clear(); + while(!iStringStream.eof()){ + if (m->control_pressed) { break; } + string temp; + iStringStream >> temp; m->gobble(iStringStream); + + m->listBinLabelsInFile.push_back(temp); + } + + f >> label >> hold; + }else { + //read in first row + f >> hold; + + //make binlabels because we don't have any + string snumBins = toString(hold); + m->listBinLabelsInFile.clear(); + for (int i = 0; i < hold; i++) { + //if there is a bin label use it otherwise make one + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + m->listBinLabelsInFile.push_back(binLabel); + } + } + m->saveNextLabel = label; + }else { + f >> label >> hold; + m->saveNextLabel = label; + } + + binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold); + data.assign(hold, ""); + string inputData = ""; + for(int i=0;i> inputData; set(i, inputData); } + m->gobble(f); + + if (f.eof()) { m->saveNextLabel = ""; } } catch(exception& e) { @@ -79,7 +133,59 @@ void SharedListVector::set(int binNumber, string seqNames){ string SharedListVector::get(int index){ return data[index]; } +/***********************************************************************/ + +void SharedListVector::setLabels(vector labels){ + try { + binLabels = labels; + } + catch(exception& e) { + m->errorOut(e, "SharedListVector", "setLabels"); + exit(1); + } +} +/***********************************************************************/ +//could potentially end up with duplicate binlabel names with code below. +//we don't currently use them in a way that would do that. +//if you had a listfile that had been subsampled and then added to it, dup names would be possible. +vector SharedListVector::getLabels(){ + try { + string tagHeader = "Otu"; + if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; } + + if (binLabels.size() < data.size()) { + string snumBins = toString(numBins); + + for (int i = 0; i < numBins; i++) { + string binLabel = tagHeader; + + if (i < binLabels.size()) { //label exists, check leading zeros length + string sbinNumber = m->getSimpleLabel(binLabels[i]); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + binLabels[i] = binLabel; + }else{ + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + binLabels.push_back(binLabel); + } + } + } + return binLabels; + } + catch(exception& e) { + m->errorOut(e, "SharedListVector", "getLabels"); + exit(1); + } +} /***********************************************************************/ void SharedListVector::push_back(string seqNames){ @@ -237,6 +343,8 @@ SharedOrderVector* SharedListVector::getSharedOrderVector(){ /***********************************************************************/ SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) { try { + m->currentSharedBinLabels = binLabels; + SharedRAbundVector rav(data.size()); for(int i=0;i SharedListVector::getSharedRAbundVector() { try { + m->currentSharedBinLabels = binLabels; + SharedUtil* util; util = new SharedUtil(); vector lookup; //contains just the groups the user selected diff --git a/sharedlistvector.h b/sharedlistvector.h index 8177925..13fed97 100644 --- a/sharedlistvector.h +++ b/sharedlistvector.h @@ -33,7 +33,7 @@ public: SharedListVector(); SharedListVector(int); SharedListVector(ifstream&); - SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; countTable = NULL; }; + SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs), binLabels(lv.binLabels) { groupmap = NULL; countTable = NULL; }; ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } if (countTable != NULL) { delete countTable; } }; int getNumBins() { return numBins; } @@ -42,6 +42,8 @@ public: void set(int, string); string get(int); + vector getLabels(); + void setLabels(vector); void push_back(string); void resize(int); void clear(); @@ -63,6 +65,7 @@ private: int maxRank; int numBins; int numSeqs; + vector binLabels; }; diff --git a/sharedordervector.cpp b/sharedordervector.cpp index 5231b3c..80b8fb9 100644 --- a/sharedordervector.cpp +++ b/sharedordervector.cpp @@ -55,13 +55,13 @@ SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() { //reads in a //parse labels to save istringstream iStringStream(label); - m->binLabelsInFile.clear(); + m->sharedBinLabelsInFile.clear(); while(!iStringStream.eof()){ if (m->control_pressed) { break; } string temp; iStringStream >> temp; m->gobble(iStringStream); - m->binLabelsInFile.push_back(temp); + m->sharedBinLabelsInFile.push_back(temp); } f >> label; @@ -69,7 +69,7 @@ SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() { //reads in a }else { label = m->saveNextLabel; } //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling - m->currentBinLabels = m->binLabelsInFile; + m->currentSharedBinLabels = m->sharedBinLabelsInFile; //read in first row since you know there is at least 1 group. f >> groupN >> num; diff --git a/sharedrabundfloatvector.cpp b/sharedrabundfloatvector.cpp index 71c868b..9eeb0f2 100644 --- a/sharedrabundfloatvector.cpp +++ b/sharedrabundfloatvector.cpp @@ -62,13 +62,13 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma //parse labels to save istringstream iStringStream(label); - m->binLabelsInFile.clear(); + m->sharedBinLabelsInFile.clear(); while(!iStringStream.eof()){ if (m->control_pressed) { break; } string temp; iStringStream >> temp; m->gobble(iStringStream); - m->binLabelsInFile.push_back(temp); + m->sharedBinLabelsInFile.push_back(temp); } f >> label >> groupN >> num; @@ -78,7 +78,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma //make binlabels because we don't have any string snumBins = toString(num); - m->binLabelsInFile.clear(); + m->sharedBinLabelsInFile.clear(); for (int i = 0; i < num; i++) { //if there is a bin label use it otherwise make one string binLabel = "Otu"; @@ -88,7 +88,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - m->binLabelsInFile.push_back(binLabel); + m->sharedBinLabelsInFile.push_back(binLabel); } } }else { @@ -99,7 +99,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma } //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling - m->currentBinLabels = m->binLabelsInFile; + m->currentSharedBinLabels = m->sharedBinLabelsInFile; holdLabel = label; @@ -295,7 +295,7 @@ void SharedRAbundFloatVector::printHeaders(ostream& output){ for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } output << binLabel << '\t'; } @@ -310,7 +310,7 @@ void SharedRAbundFloatVector::printHeaders(ostream& output){ for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } output << binLabel << '\t'; } @@ -318,7 +318,7 @@ void SharedRAbundFloatVector::printHeaders(ostream& output){ output << endl; } - m->printedHeaders = true; + m->printedSharedHeaders = true; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "printHeaders"); @@ -550,7 +550,7 @@ int SharedRAbundFloatVector::eliminateZeroOTUS(vector& for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } newBinLabels.push_back(binLabel); } @@ -559,7 +559,7 @@ int SharedRAbundFloatVector::eliminateZeroOTUS(vector& for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; - m->currentBinLabels = newBinLabels; + m->currentSharedBinLabels = newBinLabels; return 0; diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 9b2bb67..0f2e48e 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -86,13 +86,13 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), //parse labels to save istringstream iStringStream(label); - m->binLabelsInFile.clear(); + m->sharedBinLabelsInFile.clear(); while(!iStringStream.eof()){ if (m->control_pressed) { break; } string temp; iStringStream >> temp; m->gobble(iStringStream); - m->binLabelsInFile.push_back(temp); + m->sharedBinLabelsInFile.push_back(temp); } f >> label >> groupN >> num; @@ -102,7 +102,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), //make binlabels because we don't have any string snumBins = toString(num); - m->binLabelsInFile.clear(); + m->sharedBinLabelsInFile.clear(); for (int i = 0; i < num; i++) { //if there is a bin label use it otherwise make one string binLabel = "Otu"; @@ -112,7 +112,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - m->binLabelsInFile.push_back(binLabel); + m->sharedBinLabelsInFile.push_back(binLabel); } } }else { @@ -125,7 +125,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), } //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling - m->currentBinLabels = m->binLabelsInFile; + m->currentSharedBinLabels = m->sharedBinLabelsInFile; holdLabel = label; @@ -385,7 +385,7 @@ void SharedRAbundVector::printHeaders(ostream& output){ for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } output << binLabel << '\t'; } @@ -400,14 +400,14 @@ void SharedRAbundVector::printHeaders(ostream& output){ for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } output << binLabel << '\t'; } output << endl; } - m->printedHeaders = true; + m->printedSharedHeaders = true; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "printHeaders"); @@ -533,7 +533,7 @@ int SharedRAbundVector::eliminateZeroOTUS(vector& thislooku for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } newBinLabels.push_back(binLabel); } @@ -542,7 +542,7 @@ int SharedRAbundVector::eliminateZeroOTUS(vector& thislooku for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } thislookup = newLookup; - m->currentBinLabels = newBinLabels; + m->currentSharedBinLabels = newBinLabels; return 0; diff --git a/sharedsobscollectsummary.cpp b/sharedsobscollectsummary.cpp index b53b09a..204b61c 100644 --- a/sharedsobscollectsummary.cpp +++ b/sharedsobscollectsummary.cpp @@ -57,7 +57,7 @@ EstOutput SharedSobsCS::getValues(vector shared, vectorcurrentBinLabels[i]); } + if (sharedByAll == true) { observed++; labels.push_back(m->currentSharedBinLabels[i]); } } data[0] = observed; diff --git a/sparcccommand.cpp b/sparcccommand.cpp index a7f5d78..4c9049a 100644 --- a/sparcccommand.cpp +++ b/sparcccommand.cpp @@ -317,7 +317,7 @@ int SparccCommand::process(vector& lookup){ cout.setf(ios::showpoint); vector > sharedVector; - vector otuNames = m->currentBinLabels; + vector otuNames = m->currentSharedBinLabels; //fill sharedVector to pass to CalcSparcc for (int i = 0; i < lookup.size(); i++) { diff --git a/subsample.cpp b/subsample.cpp index 392f97b..2eb1d49 100644 --- a/subsample.cpp +++ b/subsample.cpp @@ -108,7 +108,7 @@ vector SubSample::getSample(vector& thislookup, int try { //save mothurOut's binLabels to restore for next label - vector saveBinLabels = m->currentBinLabels; + vector saveBinLabels = m->currentSharedBinLabels; int numBins = thislookup[0]->getNumBins(); for (int i = 0; i < thislookup.size(); i++) { @@ -136,7 +136,7 @@ vector SubSample::getSample(vector& thislookup, int for (int j = 0; j < size; j++) { - if (m->control_pressed) { return m->currentBinLabels; } + if (m->control_pressed) { return m->currentSharedBinLabels; } int bin = order.get(j); @@ -149,11 +149,11 @@ vector SubSample::getSample(vector& thislookup, int //subsampling may have created some otus with no sequences in them eliminateZeroOTUS(thislookup); - if (m->control_pressed) { return m->currentBinLabels; } + if (m->control_pressed) { return m->currentSharedBinLabels; } //save mothurOut's binLabels to restore for next label - vector subsampleBinLabels = m->currentBinLabels; - m->currentBinLabels = saveBinLabels; + vector subsampleBinLabels = m->currentSharedBinLabels; + m->currentSharedBinLabels = saveBinLabels; return subsampleBinLabels; @@ -200,7 +200,7 @@ int SubSample::eliminateZeroOTUS(vector& thislookup) { for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; - if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; } + if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; } newBinLabels.push_back(binLabel); } @@ -210,7 +210,7 @@ int SubSample::eliminateZeroOTUS(vector& thislookup) { thislookup.clear(); thislookup = newLookup; - m->currentBinLabels = newBinLabels; + m->currentSharedBinLabels = newBinLabels; return 0; diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index 6c0d1bc..a89d191 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -947,7 +947,7 @@ int SubSampleCommand::processShared(vector& thislookup) { try { //save mothurOut's binLabels to restore for next label - vector saveBinLabels = m->currentBinLabels; + vector saveBinLabels = m->currentSharedBinLabels; string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); } @@ -965,7 +965,7 @@ int SubSampleCommand::processShared(vector& thislookup) { m->openOutputFile(outputFileName, out); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); - m->currentBinLabels = subsampledLabels; + m->currentSharedBinLabels = subsampledLabels; thislookup[0]->printHeaders(out); @@ -977,7 +977,7 @@ int SubSampleCommand::processShared(vector& thislookup) { //save mothurOut's binLabels to restore for next label - m->currentBinLabels = saveBinLabels; + m->currentSharedBinLabels = saveBinLabels; return 0;