From 72ebb6fa35b45b149812d47c2b1cb8acaca64659 Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 29 Oct 2009 13:56:37 +0000 Subject: [PATCH] added hcluster command and fixed some bugs, namely one with smart distancing. --- Mothur.xcodeproj/project.pbxproj | 26 ++- binsequencecommand.cpp | 5 + bootstrapsharedcommand.cpp | 4 + clustercommand.cpp | 3 +- collectcommand.cpp | 4 + collectsharedcommand.cpp | 5 + commandfactory.cpp | 5 + getlistcountcommand.cpp | 5 + getoturepcommand.cpp | 5 + getrabundcommand.cpp | 5 + getsabundcommand.cpp | 5 + getsharedotucommand.cpp | 6 +- globaldata.cpp | 8 +- globaldata.hpp | 5 +- hcluster.cpp | 299 +++++++++++++++++++++++++ hcluster.h | 69 ++++++ hclustercommand.cpp | 370 +++++++++++++++++++++++++++++++ hclustercommand.h | 80 +++++++ heatmapcommand.cpp | 11 +- heatmapsimcommand.cpp | 4 + matrixoutputcommand.cpp | 6 +- nameassignment.cpp | 14 ++ nameassignment.hpp | 1 + rarefactcommand.cpp | 5 + rarefactsharedcommand.cpp | 5 + readcluster.cpp | 264 ++++++++++++++++++++++ readcluster.h | 43 ++++ readcolumn.cpp | 284 ++++++++++++------------ readcolumn.h | 1 + readdistcommand.cpp | 7 +- screenseqscommand.cpp | 34 +++ sharedcommand.cpp | 10 +- slayer.cpp | 2 +- sparsematrix.cpp | 2 +- summarycommand.cpp | 5 + summarysharedcommand.cpp | 7 +- treegroupscommand.cpp | 5 + venncommand.cpp | 10 + 38 files changed, 1467 insertions(+), 162 deletions(-) create mode 100644 hcluster.cpp create mode 100644 hcluster.h create mode 100644 hclustercommand.cpp create mode 100644 hclustercommand.h create mode 100644 readcluster.cpp create mode 100644 readcluster.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index fa7c672..252c02e 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -164,7 +164,10 @@ A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; }; A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; }; A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; }; + A729ACD010848E6100139801 /* hclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A729ACCF10848E6100139801 /* hclustercommand.cpp */; }; + A729ACE410849BBE00139801 /* hcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A729ACE310849BBE00139801 /* hcluster.cpp */; }; A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; }; + A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A751ACE91098B283003D0911 /* readcluster.cpp */; }; A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; }; A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */; }; A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0450D106CEEC90046FC83 /* slayer.cpp */; }; @@ -525,8 +528,14 @@ A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = SOURCE_ROOT; }; A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; }; A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; }; - A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlistcountcommand.h; sourceTree = ""; }; - A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlistcountcommand.cpp; sourceTree = ""; }; + A729ACCE10848E6100139801 /* hclustercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hclustercommand.h; sourceTree = SOURCE_ROOT; }; + A729ACCF10848E6100139801 /* hclustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hclustercommand.cpp; sourceTree = SOURCE_ROOT; }; + A729ACE210849BBE00139801 /* hcluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hcluster.h; sourceTree = SOURCE_ROOT; }; + A729ACE310849BBE00139801 /* hcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hcluster.cpp; sourceTree = SOURCE_ROOT; }; + A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlistcountcommand.h; sourceTree = SOURCE_ROOT; }; + A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlistcountcommand.cpp; sourceTree = SOURCE_ROOT; }; + A751ACE81098B283003D0911 /* readcluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readcluster.h; sourceTree = SOURCE_ROOT; }; + A751ACE91098B283003D0911 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcluster.cpp; sourceTree = SOURCE_ROOT; }; A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; }; A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; }; A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; }; @@ -585,6 +594,8 @@ 374F2FE81006346600E97C66 /* chimera */, 37D927C20F21331F001D4494 /* cluster.hpp */, 37D927C10F21331F001D4494 /* cluster.cpp */, + A729ACE210849BBE00139801 /* hcluster.h */, + A729ACE310849BBE00139801 /* hcluster.cpp */, 37D928A90F2133E5001D4494 /* commands */, 37D927C60F21331F001D4494 /* collect.h */, 37D927C50F21331F001D4494 /* collect.cpp */, @@ -618,8 +629,6 @@ 7E412FE90F8D3E2C00381DD0 /* libshuff.cpp */, 37E3ED800F4D9D0D00B5DF02 /* mothur.h */, 37D927EF0F21331F001D4494 /* mothur.cpp */, - 37D927F10F21331F001D4494 /* nameassignment.hpp */, - 37D927F00F21331F001D4494 /* nameassignment.cpp */, 373C691E0FC1C98600137ACD /* nast.hpp */, 373C691D0FC1C98600137ACD /* nast.cpp */, 373C692A0FC1C9EB00137ACD /* nastreport.hpp */, @@ -690,6 +699,8 @@ 3796441D0FB9B9650081FDB6 /* read */ = { isa = PBXGroup; children = ( + A751ACE81098B283003D0911 /* readcluster.h */, + A751ACE91098B283003D0911 /* readcluster.cpp */, 375AA1340F9E433D008EF9B8 /* readcolumn.h */, 375AA1330F9E433D008EF9B8 /* readcolumn.cpp */, 37D928130F21331F001D4494 /* readmatrix.hpp */, @@ -860,6 +871,8 @@ 37B73CA51004D89A008C4B41 /* getseqscommand.cpp */, A7E4A781106913F900688F62 /* getsharedotucommand.h */, A7E4A782106913F900688F62 /* getsharedotucommand.cpp */, + A729ACCE10848E6100139801 /* hclustercommand.h */, + A729ACCF10848E6100139801 /* hclustercommand.cpp */, 375873F10F7D64800040F377 /* heatmapcommand.h */, 375873F00F7D64800040F377 /* heatmapcommand.cpp */, 378598640FDD497000EF9D03 /* heatmapsimcommand.h */, @@ -948,6 +961,8 @@ 37D927EB0F21331F001D4494 /* kmerdb.cpp */, 37D927EE0F21331F001D4494 /* listvector.hpp */, 37D927ED0F21331F001D4494 /* listvector.cpp */, + 37D927F10F21331F001D4494 /* nameassignment.hpp */, + 37D927F00F21331F001D4494 /* nameassignment.cpp */, 37D927F80F21331F001D4494 /* ordervector.hpp */, 37D927F70F21331F001D4494 /* ordervector.cpp */, 37D928000F21331F001D4494 /* rabundvector.hpp */, @@ -1215,6 +1230,9 @@ A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */, A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */, A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */, + A729ACD010848E6100139801 /* hclustercommand.cpp in Sources */, + A729ACE410849BBE00139801 /* hcluster.cpp in Sources */, + A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp index 08ae6b6..ad90ff4 100644 --- a/binsequencecommand.cpp +++ b/binsequencecommand.cpp @@ -166,6 +166,8 @@ int BinSeqCommand::execute(){ } if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + delete list; list = input->getListVector(lastLabel); @@ -174,6 +176,9 @@ int BinSeqCommand::execute(){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); } lastLabel = list->getLabel(); diff --git a/bootstrapsharedcommand.cpp b/bootstrapsharedcommand.cpp index 0f758c1..1b682c7 100644 --- a/bootstrapsharedcommand.cpp +++ b/bootstrapsharedcommand.cpp @@ -224,6 +224,7 @@ int BootSharedCommand::execute(){ //you have a label the user want that is smaller than this label and the last label has not already been processed if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = order->getLabel(); delete order; order = input->getSharedOrderVector(lastLabel); @@ -232,6 +233,9 @@ int BootSharedCommand::execute(){ processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); + + //restore real lastlabel to save below + order->setLabel(saveLabel); } diff --git a/clustercommand.cpp b/clustercommand.cpp index d3b9bbf..8208b7e 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -71,7 +71,8 @@ ClusterCommand::ClusterCommand(string option){ if (abort == false) { - //get matrix, list and rabund for execute + + //get matrix, list and rabund for execute if(globaldata->gSparseMatrix != NULL) { matrix = globaldata->gSparseMatrix; } if(globaldata->gListVector != NULL){ diff --git a/collectcommand.cpp b/collectcommand.cpp index 7d3daef..ea2d558 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -221,6 +221,7 @@ int CollectCommand::execute(){ } //you have a label the user want that is smaller than this label and the last label has not already been processed if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = order->getLabel(); delete order; order = (input->getOrderVector(lastLabel)); @@ -232,6 +233,9 @@ int CollectCommand::execute(){ mothurOut(order->getLabel()); mothurOutEndLine(); processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); + + //restore real lastlabel to save below + order->setLabel(saveLabel); } lastLabel = order->getLabel(); diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index 18faa2d..844b138 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -244,6 +244,8 @@ int CollectSharedCommand::execute(){ //you have a label the user want that is smaller than this label and the last label has not already been processed if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = order->getLabel(); + delete order; order = input->getSharedOrderVector(lastLabel); @@ -255,6 +257,9 @@ int CollectSharedCommand::execute(){ mothurOut(order->getLabel()); mothurOutEndLine(); processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); + + //restore real lastlabel to save below + order->setLabel(saveLabel); } diff --git a/commandfactory.cpp b/commandfactory.cpp index 9adf679..5f07abb 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -57,6 +57,7 @@ #include "secondarystructurecommand.h" #include "getsharedotucommand.h" #include "getlistcountcommand.h" +#include "hclustercommand.h" /***********************************************************/ @@ -113,6 +114,8 @@ CommandFactory::CommandFactory(){ commands["get.sharedotu"] = "get.sharedotu"; commands["get.listcount"] = "get.listcount"; commands["quit"] = "quit"; + + commands["hcluster"] = "hcluster"; } /***********************************************************/ @@ -177,6 +180,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "align.check") { command = new AlignCheckCommand(optionString); } else if(commandName == "get.sharedotu") { command = new GetSharedOTUCommand(optionString); } else if(commandName == "get.listcount") { command = new GetListCountCommand(optionString); } + + else if(commandName == "hcluster") { command = new HClusterCommand(optionString); } else { command = new NoCommand(optionString); } return command; diff --git a/getlistcountcommand.cpp b/getlistcountcommand.cpp index f14e17a..94437fd 100644 --- a/getlistcountcommand.cpp +++ b/getlistcountcommand.cpp @@ -120,6 +120,8 @@ int GetListCountCommand::execute(){ } if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + delete list; list = input->getListVector(lastLabel); @@ -127,6 +129,9 @@ int GetListCountCommand::execute(){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); } lastLabel = list->getLabel(); diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index 0748619..e9bed00 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -199,6 +199,8 @@ int GetOTURepCommand::execute(){ } if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + delete list; list = input->getListVector(lastLabel); mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine(); @@ -207,6 +209,9 @@ int GetOTURepCommand::execute(){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); } lastLabel = list->getLabel(); diff --git a/getrabundcommand.cpp b/getrabundcommand.cpp index 04b1b0b..2ed5b7b 100644 --- a/getrabundcommand.cpp +++ b/getrabundcommand.cpp @@ -133,6 +133,8 @@ int GetRAbundCommand::execute(){ } if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + delete list; list = input->getListVector(lastLabel); @@ -147,6 +149,9 @@ int GetRAbundCommand::execute(){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); } lastLabel = list->getLabel(); diff --git a/getsabundcommand.cpp b/getsabundcommand.cpp index 80a4429..3dd58c1 100644 --- a/getsabundcommand.cpp +++ b/getsabundcommand.cpp @@ -123,6 +123,8 @@ int GetSAbundCommand::execute(){ } if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = order->getLabel(); + delete order; order = (input->getOrderVector(lastLabel)); @@ -134,6 +136,9 @@ int GetSAbundCommand::execute(){ processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); + + //restore real lastlabel to save below + order->setLabel(saveLabel); } diff --git a/getsharedotucommand.cpp b/getsharedotucommand.cpp index 3def8bc..56f030d 100644 --- a/getsharedotucommand.cpp +++ b/getsharedotucommand.cpp @@ -92,7 +92,7 @@ void GetSharedOTUCommand::help(){ mothurOut("With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n"); mothurOut("The get.sharedotu command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n"); mothurOut("The get.sharedotu command should be in the following format: get.sabund(label=yourLabels, groups=yourGroups, fasta=yourFastafile, output=yourOutput).\n"); - mothurOut("Example get.sharedotu(label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n"); + mothurOut("Example get.sharedotu(list=amazon.fn.list, label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n"); mothurOut("The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n"); mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n"); } @@ -161,12 +161,16 @@ int GetSharedOTUCommand::execute(){ } if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); mothurOut(lastlist->getLabel()); process(lastlist); processedLabels.insert(lastlist->getLabel()); userLabels.erase(lastlist->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); } lastLabel = list->getLabel(); diff --git a/globaldata.cpp b/globaldata.cpp index 690a8c3..c287169 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -27,12 +27,10 @@ string GlobalData::getNameFile() { return namefile; } string GlobalData::getGroupFile() { return groupfile; } string GlobalData::getOrderFile() { return orderfile; } string GlobalData::getTreeFile() { return treefile; } -string GlobalData::getSharedFile() { return sharedfile; } -//string GlobalData::getFastaFile() { return fastafile; } +string GlobalData::getSharedFile() { return sharedfile; } string GlobalData::getFormat() { return format; } void GlobalData::setListFile(string file) { listfile = file; inputFileName = file; } -//void GlobalData::setFastaFile(string file) { fastafile = file; inputFileName = file; } void GlobalData::setTreeFile(string file) { treefile = file; inputFileName = file; } void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file; } void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file; } @@ -63,6 +61,7 @@ GlobalData::GlobalData() { gMatrix = NULL; gTreemap = NULL; gSequenceDB = NULL; + nameMap = NULL; } /*******************************************************/ @@ -115,6 +114,8 @@ void GlobalData::newRead() { if (gTreemap != NULL) { delete gTreemap; gTreemap = NULL; } if (gSequenceDB != NULL) { delete gSequenceDB; gSequenceDB = NULL;} + + if (nameMap != NULL) { delete nameMap; nameMap = NULL; } gTree.clear(); @@ -147,6 +148,7 @@ GlobalData::~GlobalData() { if (gMatrix != NULL) { delete gMatrix; gMatrix = NULL;} if (gTreemap != NULL) { delete gTreemap; gTreemap = NULL; } if (gSequenceDB != NULL) { delete gSequenceDB; gSequenceDB = NULL;} + if (nameMap != NULL) { delete nameMap; nameMap = NULL; } } catch(exception& e) { errorOut(e, "GlobalData", "~GlobalData"); diff --git a/globaldata.hpp b/globaldata.hpp index e125e6f..0238a0a 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -10,6 +10,7 @@ #include "tree.h" #include "sparsematrix.hpp" #include "sequencedb.h" +#include "nameassignment.hpp" class ListVector; @@ -73,6 +74,8 @@ public: void setOrderFile(string file); void setFormat(string); //do we need this? + NameAssignment* nameMap; + void clear(); void clearLabels(); void clearAbund(); @@ -81,7 +84,7 @@ public: private: - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, treefile, sharedfile, format; + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, treefile, sharedfile, format, distfile; static GlobalData* _uniqueInstance; GlobalData( const GlobalData& ); // Disable copy constructor diff --git a/hcluster.cpp b/hcluster.cpp new file mode 100644 index 0000000..df36260 --- /dev/null +++ b/hcluster.cpp @@ -0,0 +1,299 @@ +/* + * hcluster.cpp + * Mothur + * + * Created by westcott on 10/13/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "hcluster.h" +#include "rabundvector.hpp" +#include "listvector.hpp" +#include "sparsematrix.hpp" + +/***********************************************************************/ + +HCluster::HCluster(RAbundVector* rav, ListVector* lv) : rabund(rav), list(lv){ + try { + + numSeqs = list->getNumSeqs(); + + //initialize cluster array + for (int i = 0; i < numSeqs; i++) { + clusterNode temp(1, -1, i); + clusterArray.push_back(temp); + } + + } + catch(exception& e) { + errorOut(e, "HCluster", "HCluster"); + exit(1); + } +} +/***********************************************************************/ + +void HCluster::clusterBins(){ + try { + //cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild); + + rabund->set(clusterArray[smallCol].smallChild, rabund->get(clusterArray[smallRow].smallChild)+rabund->get(clusterArray[smallCol].smallChild)); + rabund->set(clusterArray[smallRow].smallChild, 0); + rabund->setLabel(toString(smallDist)); + + //cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl; + } + catch(exception& e) { + errorOut(e, "HCluster", "clusterBins"); + exit(1); + } + + +} + +/***********************************************************************/ + +void HCluster::clusterNames(){ + try { + ///cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild); + + list->set(clusterArray[smallCol].smallChild, list->get(clusterArray[smallRow].smallChild)+','+list->get(clusterArray[smallCol].smallChild)); + list->set(clusterArray[smallRow].smallChild, ""); + list->setLabel(toString(smallDist)); + + //cout << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild) << endl; + + } + catch(exception& e) { + errorOut(e, "HCluster", "clusterNames"); + exit(1); + } + +} +/***********************************************************************/ +int HCluster::getUpmostParent(int node){ + try { + + while (clusterArray[node].parent != -1) { + node = clusterArray[node].parent; + } + + return node; + } + catch(exception& e) { + errorOut(e, "HCluster", "getUpmostParent"); + exit(1); + } +} +/***********************************************************************/ +void HCluster::printInfo(){ + try { + + cout << "link table" << endl; + for (it = activeLinks.begin(); it!= activeLinks.end(); it++) { + cout << it->first << " = " << it->second << endl; + } + cout << endl; + for (int i = 0; i < linkTable.size(); i++) { + for (it = linkTable[i].begin(); it != linkTable[i].end(); it++) { + cout << it->first << '\t' << it->second << '\t' << '\t'; + } + cout << endl; + } + cout << endl << "clusterArray" << endl; + + for (int i = 0; i < clusterArray.size(); i++) { + cout << i << '\t' << clusterArray[i].numSeq << '\t' << clusterArray[i].parent << '\t' << clusterArray[i].smallChild << endl; + } + cout << endl; + + + } + catch(exception& e) { + errorOut(e, "HCluster", "getUpmostParent"); + exit(1); + } +} +/***********************************************************************/ +int HCluster::makeActive() { + try { + + int linkValue = 1; +//cout << "active - here" << endl; + it = activeLinks.find(smallRow); + it2 = activeLinks.find(smallCol); + + if ((it == activeLinks.end()) && (it2 == activeLinks.end())) { //both are not active so add them + int size = linkTable.size(); + map temp; map temp2; + + //add link to eachother + temp[smallRow] = 1; // 1 2 + temp2[smallCol] = 1; // 1 0 1 + // 2 1 0 + linkTable.push_back(temp); + linkTable.push_back(temp2); + + //add to activeLinks + activeLinks[smallRow] = size; + activeLinks[smallCol] = size+1; +//cout << "active - here1" << endl; + }else if ((it != activeLinks.end()) && (it2 == activeLinks.end())) { //smallRow is active, smallCol is not + int size = linkTable.size(); + int alreadyActiveRow = it->second; + map temp; + + //add link to eachother + temp[smallRow] = 1; // 6 2 3 5 + linkTable.push_back(temp); // 6 0 1 2 0 + linkTable[alreadyActiveRow][smallCol] = 1; // 2 1 0 1 1 + // 3 2 1 0 0 + // 5 0 1 0 0 + //add to activeLinks + activeLinks[smallCol] = size; +//cout << "active - here2" << endl; + }else if ((it == activeLinks.end()) && (it2 != activeLinks.end())) { //smallCol is active, smallRow is not + int size = linkTable.size(); + int alreadyActiveCol = it2->second; + map temp; + + //add link to eachother + temp[smallCol] = 1; // 6 2 3 5 + linkTable.push_back(temp); // 6 0 1 2 0 + linkTable[alreadyActiveCol][smallRow] = 1; // 2 1 0 1 1 + // 3 2 1 0 0 + // 5 0 1 0 0 + //add to activeLinks + activeLinks[smallRow] = size; +//cout << "active - here3" << endl; + }else { //both are active so add one + int row = it->second; + int col = it2->second; +//cout << row << '\t' << col << endl; + + linkTable[row][smallCol]++; + linkTable[col][smallRow]++; + linkValue = linkTable[row][smallCol]; +//cout << "active - here4" << endl; + } + + return linkValue; + } + catch(exception& e) { + errorOut(e, "HCluster", "makeActive"); + exit(1); + } +} +/***********************************************************************/ +void HCluster::updateArrayandLinkTable() { + try { + //if cluster was made update clusterArray and linkTable + int size = clusterArray.size(); + + //add new node + clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild); + clusterArray.push_back(temp); + + //update child nodes + clusterArray[smallRow].parent = size; + clusterArray[smallCol].parent = size; + + //update linkTable by merging clustered rows and columns + int rowSpot = activeLinks[smallRow]; + int colSpot = activeLinks[smallCol]; + //cout << "here" << endl; + //fix old rows + for (int i = 0; i < linkTable.size(); i++) { + //check if they are in map + it = linkTable[i].find(smallRow); + it2 = linkTable[i].find(smallCol); + + if ((it!=linkTable[i].end()) && (it2!=linkTable[i].end())) { //they are both there + linkTable[i][size] = linkTable[i][smallRow]+linkTable[i][smallCol]; + linkTable[i].erase(smallCol); //delete col row + linkTable[i].erase(smallRow); //delete col row + }else if ((it==linkTable[i].end()) && (it2!=linkTable[i].end())) { //only col + linkTable[i][size] = linkTable[i][smallCol]; + linkTable[i].erase(smallCol); //delete col + }else if ((it!=linkTable[i].end()) && (it2==linkTable[i].end())) { //only row + linkTable[i][size] = linkTable[i][smallRow]; + linkTable[i].erase(smallRow); //delete col + } + } + //printInfo(); +//cout << "here2" << endl; + //merge their values + for (it = linkTable[rowSpot].begin(); it != linkTable[rowSpot].end(); it++) { + it2 = linkTable[colSpot].find(it->first); //does the col also have this + + if (it2 == linkTable[colSpot].end()) { //not there so add it + linkTable[colSpot][it->first] = it->second; + }else { //merge them + linkTable[colSpot][it->first] = it->second+it2->second; + } + } +//cout << "here3" << endl; + linkTable[colSpot].erase(size); + linkTable.erase(linkTable.begin()+rowSpot); //delete col + //printInfo(); + //update activerows + activeLinks.erase(smallRow); + activeLinks.erase(smallCol); + + if(rowSpot>colSpot) { activeLinks[size] = colSpot; } + else{ activeLinks[size] = colSpot-1; } + + //adjust everybody elses spot since you deleted - time vs. space + for (it = activeLinks.begin(); it != activeLinks.end(); it++) { + if (it->second > rowSpot) { activeLinks[it->first]--; } + } + + +//cout << "here4" << endl; + + } + catch(exception& e) { + errorOut(e, "HCluster", "updateArrayandLinkTable"); + exit(1); + } +} +/***********************************************************************/ +bool HCluster::update(int row, int col, float distance){ + try { + + smallRow = row; + smallCol = col; + smallDist = distance; + bool clustered = false; + + //find upmost parent of row and col + smallRow = getUpmostParent(smallRow); + smallCol = getUpmostParent(smallCol); + //cout << "smallRow = " << smallRow << " smallCol = " << smallCol << endl; + + //are they active in the link table + int linkValue = makeActive(); //after this point this nodes info is active in linkTable + //printInfo(); +//cout << "linkValue = " << linkValue << " times = " << (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq) << endl; + //can we cluster??? + if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) { + updateArrayandLinkTable(); + clusterBins(); + clusterNames(); + clustered = true; + //printInfo(); + } + + + return clustered; + } + catch(exception& e) { + errorOut(e, "HCluster", "update"); + exit(1); + } +} + + +/***********************************************************************/ + + diff --git a/hcluster.h b/hcluster.h new file mode 100644 index 0000000..2688c19 --- /dev/null +++ b/hcluster.h @@ -0,0 +1,69 @@ +#ifndef HCLUSTER_H +#define HCLUSTER_H + +/* + * hcluster.h + * Mothur + * + * Created by westcott on 10/13/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + + +#include "mothur.h" + +class RAbundVector; +class ListVector; +/************************************************************/ +struct clusterNode { + int numSeq; + int parent; + int smallChild; //used to make linkTable work with list and rabund + clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {}; +}; + +/***********************************************************************/ +class HCluster { + +public: + HCluster(RAbundVector*, ListVector*); + bool update(int, int, float); + //string getTag(); + +protected: + void clusterBins(); + void clusterNames(); + int getUpmostParent(int); + int makeActive(); + void printInfo(); + void updateArrayandLinkTable(); + + RAbundVector* rabund; + ListVector* list; + + vector clusterArray; + vector< map > linkTable; // vector of maps - linkTable[1][6] = 2 would mean sequence in spot 1 has 2 links with sequence in 6 + map activeLinks; //maps sequence to index in linkTable + map::iterator it; + map::iterator it2; + + int numSeqs; + + int smallRow; + int smallCol; + float smallDist; + +}; + +/***********************************************************************/ + + + + + + + +#endif + + diff --git a/hclustercommand.cpp b/hclustercommand.cpp new file mode 100644 index 0000000..38ba4b6 --- /dev/null +++ b/hclustercommand.cpp @@ -0,0 +1,370 @@ +/* + * hclustercommand.cpp + * Mothur + * + * Created by westcott on 10/13/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "hclustercommand.h" + +//********************************************************************************************************************** +//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen. +HClusterCommand::HClusterCommand(string option){ + try{ + globaldata = GlobalData::getInstance(); + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"cutoff","precision","method","showabund","timing","phylip","column","name","sorted"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { + abort = true; + } + } + + globaldata->newRead(); + + //check for required parameters + phylipfile = validParameter.validFile(parameters, "phylip", true); + if (phylipfile == "not open") { abort = true; } + else if (phylipfile == "not found") { phylipfile = ""; } + else { distfile = phylipfile; format = "phylip"; } + + columnfile = validParameter.validFile(parameters, "column", true); + if (columnfile == "not open") { abort = true; } + else if (columnfile == "not found") { columnfile = ""; } + else { distfile = columnfile; format = "column"; } + + namefile = validParameter.validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { namefile = ""; } + + if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a cluster command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; } + else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a cluster command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; } + + if (columnfile != "") { + if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; } + } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + //get user cutoff and precision or use defaults + string temp; + temp = validParameter.validFile(parameters, "precision", false); + if (temp == "not found") { temp = "100"; } + //saves precision legnth for formatting below + length = temp.length(); + convert(temp, precision); + + temp = validParameter.validFile(parameters, "cutoff", false); + if (temp == "not found") { temp = "10"; } + convert(temp, cutoff); + cutoff += (5 / (precision * 10.0)); + + method = validParameter.validFile(parameters, "method", false); + if (method == "not found") { method = "nearest"; } + + if ((method == "furthest") || (method == "nearest") || (method == "average")) { } + else { mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); mothurOutEndLine(); abort = true; } + + showabund = validParameter.validFile(parameters, "showabund", false); + if (showabund == "not found") { showabund = "T"; } + + sort = validParameter.validFile(parameters, "sorted", false); + if (sort == "not found") { sort = "F"; } + sorted = isTrue(sort); + + timing = validParameter.validFile(parameters, "timing", false); + if (timing == "not found") { timing = "F"; } + + + if (abort == false) { + + fileroot = getRootName(distfile); + + tag = "fn"; //until we figure out average and nearest methods + + openOutputFile(fileroot+ tag + ".sabund", sabundFile); + openOutputFile(fileroot+ tag + ".rabund", rabundFile); + openOutputFile(fileroot+ tag + ".list", listFile); + } + } + } + catch(exception& e) { + errorOut(e, "HClusterCommand", "HClusterCommand"); + exit(1); + } +} + +//********************************************************************************************************************** + +void HClusterCommand::help(){ + try { + mothurOut("The cluster command can only be executed after a successful read.dist command.\n"); + mothurOut("The cluster command parameter options are method, cuttoff, precision, showabund and timing. No parameters are required.\n"); + mothurOut("The cluster command should be in the following format: \n"); + mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n"); + mothurOut("The acceptable cluster methods are furthest, nearest and average. If no method is provided then furthest is assumed.\n\n"); + } + catch(exception& e) { + errorOut(e, "HClusterCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** + +HClusterCommand::~HClusterCommand(){} + +//********************************************************************************************************************** + +int HClusterCommand::execute(){ + try { + + if (abort == true) { return 0; } + + if(namefile != ""){ + globaldata->nameMap = new NameAssignment(namefile); + globaldata->nameMap->readMap(); + }else{ + globaldata->nameMap = NULL; + } + + if (!sorted) { + read = new ReadCluster(distfile, cutoff); + read->setFormat(format); + read->read(globaldata->nameMap); + distfile = read->getOutputFile(); + + list = read->getListVector(); + delete read; + }else { + list = new ListVector(globaldata->nameMap->getListVector()); + } + + //list vector made by read contains all sequence names + if(list != NULL){ + rabund = new RAbundVector(list->getRAbundVector()); + }else{ + mothurOut("Error: no list vector!"); mothurOutEndLine(); return 0; + } + + + + float previousDist = 0.00000; + float rndPreviousDist = 0.00000; + oldRAbund = *rabund; + oldList = *list; + + print_start = true; + start = time(NULL); + +//cout << "here" << endl; + ifstream in; + openInputFile(distfile, in); + string firstName, secondName; + float distance; + + cluster = new HCluster(rabund, list); + bool clusteredSomething; + vector seqs; seqs.resize(1); // to start loop + exitedBreak = false; //lets you know if there is a distance stored in next + + while (seqs.size() != 0){ + + seqs = getSeqs(in); + random_shuffle(seqs.begin(), seqs.end()); + + if (seqs.size() == 0) { break; } //there are no more distances + + for (int i = 0; i < seqs.size(); i++) { //-1 means skip me + + if (print_start && isTrue(timing)) { + mothurOut("Clustering (" + tag + ") dist " + toString(distance) + "/" + + toString(roundDist(distance, precision)) + + "\t(precision: " + toString(precision) + ")"); + cout.flush(); + print_start = false; + } + + ///cout << "before cluster update" << endl; + if (seqs[i].seq1 != seqs[i].seq2) { + clusteredSomething = cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist); + + float rndDist = roundDist(seqs[i].dist, precision); + //cout << "after cluster update clusterSomething = " << clusteredSomething << " rndDist = " << rndDist << " rndPreviousDist = " << rndPreviousDist << endl; + + + if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){ + printData("unique"); + } + else if((rndDist != rndPreviousDist)){ + printData(toString(rndPreviousDist, length-1)); + } + + previousDist = seqs[i].dist; + rndPreviousDist = rndDist; + oldRAbund = *rabund; + oldList = *list; + } + } + } + + in.close(); + + if (print_start && isTrue(timing)) { + //mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) + //+ "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")"); + cout.flush(); + print_start = false; + } + + if(previousDist <= 0.0000){ + printData("unique"); + } + else if(rndPreviousDistgListVector; globaldata->gListVector = NULL; + + //saves .list file so you can do the collect, rarefaction and summary commands without doing a read.list + if (globaldata->getFormat() == "phylip") { globaldata->setPhylipFile(""); } + else if (globaldata->getFormat() == "column") { globaldata->setColumnFile(""); } + + globaldata->setListFile(fileroot+ tag + ".list"); + globaldata->setNameFile(""); + globaldata->setFormat("list"); + + sabundFile.close(); + rabundFile.close(); + listFile.close(); + if (isTrue(timing)) { + //mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster " + toString(ndist) + " distances"); mothurOutEndLine(); + } + return 0; + } + catch(exception& e) { + errorOut(e, "HClusterCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** + +void HClusterCommand::printData(string label){ + try { + if (isTrue(timing)) { + mothurOut("\tTime: " + toString(time(NULL) - start) + "\tsecs for " + toString(oldRAbund.getNumBins()) + + "\tclusters. Updates: " + toString(loops)); mothurOutEndLine(); + } + print_start = true; + loops = 0; + start = time(NULL); + + oldRAbund.setLabel(label); + if (isTrue(showabund)) { + oldRAbund.getSAbundVector().print(cout); + } + oldRAbund.print(rabundFile); + oldRAbund.getSAbundVector().print(sabundFile); + + oldList.setLabel(label); + oldList.print(listFile); + } + catch(exception& e) { + errorOut(e, "HClusterCommand", "printData"); + exit(1); + } + + +} +//********************************************************************************************************************** + +vector HClusterCommand::getSeqs(ifstream& filehandle){ + try { + string firstName, secondName; + float distance, prevDistance; + vector sameSeqs; + prevDistance = -1; + + //if you are not at the beginning of the file + if (exitedBreak) { + sameSeqs.push_back(next); + prevDistance = next.dist; + exitedBreak = false; + } + + //get entry + while (filehandle) { + + filehandle >> firstName >> secondName >> distance; +//cout << firstName << '\t' << secondName << '\t' << distance << endl; + gobble(filehandle); + + //save first one + if (prevDistance == -1) { prevDistance = distance; } + + map::iterator itA = globaldata->nameMap->find(firstName); + map::iterator itB = globaldata->nameMap->find(secondName); + + if(itA == globaldata->nameMap->end()){ + cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; + } + if(itB == globaldata->nameMap->end()){ + cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; + } + + //using cutoff + if (distance > cutoff) { break; } + + if (distance != -1) { //-1 means skip me + + //are the distances the same + if (distance == prevDistance) { //save in vector + seqDist temp; + temp.seq1 = itA->second; + temp.seq2 = itB->second; + temp.dist = distance; + sameSeqs.push_back(temp); + exitedBreak = false; + //what about precision?? + + }else{ + next.seq1 = itA->second; + next.seq2 = itB->second; + next.dist = distance; + exitedBreak = true; + break; + } + + } + } + + return sameSeqs; + } + catch(exception& e) { + errorOut(e, "HClusterCommand", "getSeqs"); + exit(1); + } + + +} + +//********************************************************************************************************************** + diff --git a/hclustercommand.h b/hclustercommand.h new file mode 100644 index 0000000..7351cef --- /dev/null +++ b/hclustercommand.h @@ -0,0 +1,80 @@ +#ifndef HCLUSTERCOMMAND_H +#define HCLUSTERCOMMAND_H + +/* + * hclustercommand.h + * Mothur + * + * Created by westcott on 10/13/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "globaldata.hpp" +#include "hcluster.h" +#include "rabundvector.hpp" +#include "sabundvector.hpp" +#include "listvector.hpp" +#include "readcluster.h" + +/******************************************************************/ +//This command is an implementation of the HCluster algorythmn described in +//ESPRIT: estimating species richness using large collections of 16S rRNA pyrosequences by +//Yijun Sun1,2,*, Yunpeng Cai2, Li Liu1, Fahong Yu1, Michael L. Farrell3, William McKendree3 +//and William Farmerie1 1 + +//Interdisciplinary Center for Biotechnology Research, 2Department of Electrical and Computer Engineering, +//University of Florida, Gainesville, FL 32610-3622 and 3Materials Technology Directorate, Air Force Technical +//Applications Center, 1030 S. Highway A1A, Patrick AFB, FL 32925-3002, USA +//Received January 28, 2009; Revised April 14, 2009; Accepted April 15, 2009 + +/******************************************************************/ + + +/************************************************************/ +struct seqDist { + int seq1; + int seq2; + float dist; +}; +/************************************************************/ +class HClusterCommand : public Command { + +public: + HClusterCommand(string); + ~HClusterCommand(); + int execute(); + void help(); + +private: + GlobalData* globaldata; + HCluster* cluster; + ListVector* list; + RAbundVector* rabund; + RAbundVector oldRAbund; + ListVector oldList; + ReadCluster* read; + + bool abort; + + string method, fileroot, tag, distfile, format, phylipfile, columnfile, namefile, sort; + double cutoff; + string showabund, timing; + int precision, length; + ofstream sabundFile, rabundFile, listFile; + //ifstream in; + seqDist next; + bool exitedBreak, sorted; + + bool print_start; + time_t start; + unsigned long loops; + + void printData(string label); + vector getSeqs(ifstream&); +}; + +/************************************************************/ + +#endif diff --git a/heatmapcommand.cpp b/heatmapcommand.cpp index 52395bb..1992ce5 100644 --- a/heatmapcommand.cpp +++ b/heatmapcommand.cpp @@ -163,6 +163,8 @@ int HeatMapCommand::execute(){ } if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); mothurOut(lookup[0]->getLabel()); mothurOutEndLine(); @@ -171,6 +173,9 @@ int HeatMapCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); } lastLabel = lookup[0]->getLabel(); @@ -223,7 +228,8 @@ int HeatMapCommand::execute(){ } if ((anyLabelsToProcess(rabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { - + string saveLabel = rabund->getLabel(); + delete rabund; rabund = input->getRAbundVector(lastLabel); mothurOut(rabund->getLabel()); mothurOutEndLine(); @@ -232,6 +238,9 @@ int HeatMapCommand::execute(){ processedLabels.insert(rabund->getLabel()); userLabels.erase(rabund->getLabel()); + + //restore real lastlabel to save below + rabund->setLabel(saveLabel); } diff --git a/heatmapsimcommand.cpp b/heatmapsimcommand.cpp index d88bec0..3803692 100644 --- a/heatmapsimcommand.cpp +++ b/heatmapsimcommand.cpp @@ -197,6 +197,7 @@ int HeatMapSimCommand::execute(){ } if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); @@ -206,6 +207,9 @@ int HeatMapSimCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); } //prevent memory leak diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index 6ccbf35..5069e0f 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -196,7 +196,8 @@ int MatrixOutputCommand::execute(){ } if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { - + string saveLabel = lookup[0]->getLabel(); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); @@ -205,6 +206,9 @@ int MatrixOutputCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); } lastLabel = lookup[0]->getLabel(); diff --git a/nameassignment.cpp b/nameassignment.cpp index c31468c..9256086 100644 --- a/nameassignment.cpp +++ b/nameassignment.cpp @@ -39,6 +39,20 @@ void NameAssignment::readMap(){ exit(1); } } +//********************************************************************************************************************** +void NameAssignment::push_back(string name) { + try{ + + int num = (*this).size(); + (*this)[name] = num; + + list.push_back(name); + } + catch(exception& e) { + errorOut(e, "NameAssignment", "push_back"); + exit(1); + } +} //********************************************************************************************************************** diff --git a/nameassignment.hpp b/nameassignment.hpp index e36a763..a9bf06b 100644 --- a/nameassignment.hpp +++ b/nameassignment.hpp @@ -11,6 +11,7 @@ public: ListVector getListVector(); int get(string); void print(); + void push_back(string); private: ifstream fileHandle; ListVector list; diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 76d6602..4417d5c 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -195,6 +195,8 @@ int RareFactCommand::execute(){ } if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = order->getLabel(); + delete order; order = (input->getOrderVector(lastLabel)); @@ -205,6 +207,9 @@ int RareFactCommand::execute(){ mothurOut(order->getLabel()); mothurOutEndLine(); processedLabels.insert(order->getLabel()); userLabels.erase(order->getLabel()); + + //restore real lastlabel to save below + order->setLabel(saveLabel); } lastLabel = order->getLabel(); diff --git a/rarefactsharedcommand.cpp b/rarefactsharedcommand.cpp index 022ae65..83fb35e 100644 --- a/rarefactsharedcommand.cpp +++ b/rarefactsharedcommand.cpp @@ -188,6 +188,8 @@ int RareFactSharedCommand::execute(){ } if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); @@ -198,6 +200,9 @@ int RareFactSharedCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); } diff --git a/readcluster.cpp b/readcluster.cpp new file mode 100644 index 0000000..50787e9 --- /dev/null +++ b/readcluster.cpp @@ -0,0 +1,264 @@ +/* + * readcluster.cpp + * Mothur + * + * Created by westcott on 10/28/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "readcluster.h" + +/***********************************************************************/ + +ReadCluster::ReadCluster(string distfile, float c){ + distFile = distfile; + cutoff = c; +} + +/***********************************************************************/ + +void ReadCluster::read(NameAssignment* nameMap){ + try { + + if (format == "phylip") { convertPhylip2Column(nameMap); } + else { list = new ListVector(nameMap->getListVector()); } + + createHClusterFile(); + + } + catch(exception& e) { + errorOut(e, "ReadCluster", "read"); + exit(1); + } +} +/***********************************************************************/ + +void ReadCluster::createHClusterFile(){ + try { + string outfile = getRootName(distFile) + "sorted.dist"; + + //if you can, use the unix sort since its been optimized for years + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + string command = "sort -n -k +3 " + distFile + " -o " + outfile; + system(command.c_str()); + #else //you are stuck with my best attempt... + //windows sort does not have a way to specify a column, only a character in the line + //since we cannot assume that the distance will always be at the the same character location on each line + //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back. + + //read in file line by file and put distance first + string tempDistFile = distFile + ".temp"; + ifstream input; + ofstream output; + openInputFile(distFile, input); + openOutputFile(tempDistFile, output); + + string firstName, secondName; + float dist; + while (input) { + input >> firstName >> secondName >> dist; + output << dist << '\t' << firstName << '\t' << secondName << endl; + gobble(input); + } + input.close(); + output.close(); + + + //sort using windows sort + string tempOutfile = outfile + ".temp"; + string command = "sort " + tempDistFile + " /O " + tempOutfile; + system(command.c_str()); + + //read in sorted file and put distance at end again + ifstream input2; + openInputFile(tempOutfile, input2); + openOutputFile(outfile, output); + + while (input2) { + input2 >> dist >> firstName >> secondName; + output << firstName << '\t' << secondName << '\t' << dist << endl; + gobble(input2); + } + input2.close(); + output.close(); + + //remove temp files + remove(tempDistFile.c_str()); + remove(tempOutfile.c_str()); + #endif + + OutPutFile = outfile; + } + catch(exception& e) { + errorOut(e, "ReadCluster", "createHClusterFile"); + exit(1); + } +} + + +/***********************************************************************/ + +void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ + try { + //convert phylip file to column file + map rowToName; + map::iterator it; + + ifstream in; + ofstream out; + string tempFile = distFile + ".column.temp"; + + openInputFile(distFile, in); + openOutputFile(tempFile, out); + + float distance; + int square, nseqs; + string name; + vector matrixNames; + + in >> nseqs >> name; + rowToName[0] = name; + matrixNames.push_back(name); + + if(nameMap == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ + list = new ListVector(nameMap->getListVector()); + if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + } + + char d; + while((d=in.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + in.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + if(square == 0){ + + for(int i=1;i> name; + rowToName[i] = name; + matrixNames.push_back(name); + + //there's A LOT of repeated code throughout this method... + if(nameMap == NULL){ + list->set(i, name); + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + out << i << '\t' << j << '\t' << distance << endl; + } + } + + } + else{ + if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + out << i << '\t' << j << '\t' << distance << endl; + } + } + } + } + } + else{ + + for(int i=1;i> name; + rowToName[i] = name; + matrixNames.push_back(name); + + if(nameMap == NULL){ + list->set(i, name); + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && j < i){ + out << i << '\t' << j << '\t' << distance << endl; + } + } + + } + else{ + if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && j < i){ + out << i << '\t' << j << '\t' << distance << endl; + } + + } + } + } + } + + list->setLabel("0"); + in.close(); + out.close(); + + if(nameMap == NULL){ + for(int i=0;ipush_back(matrixNames[i]); + } + } + + ifstream in2; + ofstream out2; + + string outputFile = getRootName(distFile) + "column.dist"; + openInputFile(tempFile, in2); + openOutputFile(outputFile, out2); + + int first, second; + float dist; + + while (in2) { + in2 >> first >> second >> dist; + out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl; + gobble(in2); + } + in2.close(); + out2.close(); + + remove(tempFile.c_str()); + distFile = outputFile; + } + catch(exception& e) { + errorOut(e, "ReadCluster", "convertPhylip2Column"); + exit(1); + } +} +/***********************************************************************/ + +ReadCluster::~ReadCluster(){} +/***********************************************************************/ + diff --git a/readcluster.h b/readcluster.h new file mode 100644 index 0000000..c81b1dd --- /dev/null +++ b/readcluster.h @@ -0,0 +1,43 @@ +#ifndef READCLUSTER_H +#define READCLUSTER_H +/* + * readcluster.h + * Mothur + * + * Created by westcott on 10/28/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + + +#include "mothur.h" +#include "nameassignment.hpp" +#include "listvector.hpp" + + +/******************************************************/ + +class ReadCluster { + +public: + ReadCluster(string, float); + ~ReadCluster(); + void read(NameAssignment*); + string getOutputFile() { return OutPutFile; } + void setFormat(string f) { format = f; } + ListVector* getListVector() { return list; } + +private: + string distFile; + string OutPutFile, format; + ListVector* list; + float cutoff; + + void createHClusterFile(); + void convertPhylip2Column(NameAssignment*); +}; + +/******************************************************/ + +#endif + diff --git a/readcolumn.cpp b/readcolumn.cpp index 29c967e..fccc2cd 100644 --- a/readcolumn.cpp +++ b/readcolumn.cpp @@ -1,142 +1,142 @@ -/* - * readcolumn.cpp - * Mothur - * - * Created by Sarah Westcott on 4/21/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "readcolumn.h" -#include "progress.hpp" - -/***********************************************************************/ - -ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){ - - successOpen = openInputFile(distFile, fileHandle); - -} - -/***********************************************************************/ - -void ReadColumnMatrix::read(NameAssignment* nameMap){ - try { - - string firstName, secondName; - float distance; - int nseqs = nameMap->size(); - - list = new ListVector(nameMap->getListVector()); - - Progress* reading = new Progress("Reading matrix: ", nseqs * nseqs); - - int lt = 1; - int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose - int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix - - //need to see if this is a square or a triangular matrix... - - while(fileHandle && lt == 1){ //let's assume it's a triangular matrix... - - fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance - - map::iterator itA = nameMap->find(firstName); - map::iterator itB = nameMap->find(secondName); - - if(itA == nameMap->end()){ - cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; - } - if(itB == nameMap->end()){ - cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; - } - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff && itA != itB){ - if(itA->second > itB->second){ - PCell value(itA->second, itB->second, distance); - - if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... - refRow = itA->second; - refCol = itB->second; - D->addCell(value); - } - else if(refRow == itA->second && refCol == itB->second){ - lt = 0; - } - else{ - D->addCell(value); - } - } - else if(itA->second < itB->second){ - PCell value(itB->second, itA->second, distance); - - if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... - refRow = itA->second; - refCol = itB->second; - D->addCell(value); - } - else if(refRow == itB->second && refCol == itA->second){ - lt = 0; - } - else{ - D->addCell(value); - } - } - reading->update(itA->second * nseqs); - } - gobble(fileHandle); - } - - if(lt == 0){ // oops, it was square - fileHandle.close(); //let's start over - D->clear(); //let's start over - - openInputFile(distFile, fileHandle); //let's start over - - while(fileHandle){ - fileHandle >> firstName >> secondName >> distance; - - map::iterator itA = nameMap->find(firstName); - map::iterator itB = nameMap->find(secondName); - - if(itA == nameMap->end()){ - cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; - } - if(itB == nameMap->end()){ - cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; - } - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff && itA->second > itB->second){ - PCell value(itA->second, itB->second, distance); - D->addCell(value); - reading->update(itA->second * nseqs); - } - - gobble(fileHandle); - } - } - - reading->finish(); - fileHandle.close(); - - list->setLabel("0"); - - } - catch(exception& e) { - errorOut(e, "ReadColumnMatrix", "read"); - exit(1); - } -} - -/***********************************************************************/ - -ReadColumnMatrix::~ReadColumnMatrix(){ - //delete D; - //delete list; -} - - +/* + * readcolumn.cpp + * Mothur + * + * Created by Sarah Westcott on 4/21/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "readcolumn.h" +#include "progress.hpp" + +/***********************************************************************/ + +ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){ + + successOpen = openInputFile(distFile, fileHandle); + +} + +/***********************************************************************/ + +void ReadColumnMatrix::read(NameAssignment* nameMap){ + try { + + string firstName, secondName; + float distance; + int nseqs = nameMap->size(); + + list = new ListVector(nameMap->getListVector()); + + Progress* reading = new Progress("Reading matrix: ", nseqs * nseqs); + + int lt = 1; + int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose + int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix + + //need to see if this is a square or a triangular matrix... + + while(fileHandle && lt == 1){ //let's assume it's a triangular matrix... + + fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance + + map::iterator itA = nameMap->find(firstName); + map::iterator itB = nameMap->find(secondName); + + if(itA == nameMap->end()){ + cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; + } + if(itB == nameMap->end()){ + cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; + } + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && itA != itB){ + if(itA->second > itB->second){ + PCell value(itA->second, itB->second, distance); + + if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... + refRow = itA->second; + refCol = itB->second; + D->addCell(value); + } + else if(refRow == itA->second && refCol == itB->second){ + lt = 0; + } + else{ + D->addCell(value); + } + } + else if(itA->second < itB->second){ + PCell value(itB->second, itA->second, distance); + + if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... + refRow = itA->second; + refCol = itB->second; + D->addCell(value); + } + else if(refRow == itB->second && refCol == itA->second){ + lt = 0; + } + else{ + D->addCell(value); + } + } + reading->update(itA->second * nseqs); + } + gobble(fileHandle); + } + + if(lt == 0){ // oops, it was square + fileHandle.close(); //let's start over + D->clear(); //let's start over + + openInputFile(distFile, fileHandle); //let's start over + + while(fileHandle){ + fileHandle >> firstName >> secondName >> distance; + + map::iterator itA = nameMap->find(firstName); + map::iterator itB = nameMap->find(secondName); + + if(itA == nameMap->end()){ + cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; + } + if(itB == nameMap->end()){ + cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; + } + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && itA->second > itB->second){ + PCell value(itA->second, itB->second, distance); + D->addCell(value); + reading->update(itA->second * nseqs); + } + + gobble(fileHandle); + } + } + + reading->finish(); + fileHandle.close(); + + list->setLabel("0"); + + } + catch(exception& e) { + errorOut(e, "ReadColumnMatrix", "read"); + exit(1); + } +} + +/***********************************************************************/ + +ReadColumnMatrix::~ReadColumnMatrix(){ + //delete D; + //delete list; +} + + diff --git a/readcolumn.h b/readcolumn.h index fea6f59..069632c 100644 --- a/readcolumn.h +++ b/readcolumn.h @@ -22,6 +22,7 @@ public: private: ifstream fileHandle; string distFile; + }; /******************************************************/ diff --git a/readdistcommand.cpp b/readdistcommand.cpp index 263d57b..c4756c3 100644 --- a/readdistcommand.cpp +++ b/readdistcommand.cpp @@ -142,7 +142,10 @@ void ReadDistCommand::help(){ ReadDistCommand::~ReadDistCommand(){ if (abort == false) { - if (format != "matrix") { delete read; delete nameMap; } + if (format != "matrix") { + delete read; + delete nameMap; + } } } @@ -174,7 +177,7 @@ int ReadDistCommand::execute(){ if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix; } globaldata->gSparseMatrix = read->getMatrix(); numDists = globaldata->gSparseMatrix->getNNodes(); - + int lines = cutoff / (1.0/precision); vector dist_cutoff(lines+1,0); for (int i = 0; i <= lines;i++) { diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 4b267b5..d5e917c 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -206,6 +206,14 @@ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ goodNameOut.close(); badNameOut.close(); + //we were unable to remove some of the bad sequences + if (badSeqNames.size() != 0) { + for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) { + mothurOut("Your namefile does not include the sequence " + *it + " please correct."); + mothurOutEndLine(); + } + } + if(groupfile != ""){ ifstream inputGroups; @@ -234,6 +242,14 @@ void ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ inputGroups.close(); goodGroupOut.close(); badGroupOut.close(); + + //we were unable to remove some of the bad sequences + if (badSeqGroups.size() != 0) { + for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) { + mothurOut("Your namefile does not include the sequence " + *it + " please correct."); + mothurOutEndLine(); + } + } } } @@ -265,6 +281,15 @@ void ScreenSeqsCommand::screenGroupFile(set badSeqNames){ } gobble(inputGroups); } + + //we were unable to remove some of the bad sequences + if (badSeqNames.size() != 0) { + for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) { + mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); + mothurOutEndLine(); + } + } + inputGroups.close(); goodGroupOut.close(); badGroupOut.close(); @@ -312,6 +337,15 @@ void ScreenSeqsCommand::screenAlignReport(set badSeqNames){ } gobble(inputAlignReport); } + + //we were unable to remove some of the bad sequences + if (badSeqNames.size() != 0) { + for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) { + mothurOut("Your file does not include the sequence " + *it + " please correct."); + mothurOutEndLine(); + } + } + inputAlignReport.close(); goodAlignReportOut.close(); badAlignReportOut.close(); diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 62d8b3a..727a04b 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -52,6 +52,7 @@ int SharedCommand::execute(){ //lookup.clear(); string errorOff = "no error"; + //errorOff = ""; //read in listfile read = new ReadOTUFile(globaldata->inputFileName); @@ -83,8 +84,7 @@ int SharedCommand::execute(){ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; - set userLabels = globaldata->labels; - + set userLabels = globaldata->labels; while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) { @@ -102,6 +102,8 @@ int SharedCommand::execute(){ } if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = SharedList->getLabel(); + delete SharedList; SharedList = input->getSharedListVector(lastLabel); //get new list vector to process @@ -113,6 +115,9 @@ int SharedCommand::execute(){ processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); + + //restore real lastlabel to save below + SharedList->setLabel(saveLabel); } @@ -166,6 +171,7 @@ void SharedCommand::printSharedData(vector thislookup) { //initialize bin values for (int i = 0; i < thislookup.size(); i++) { +//cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << endl; out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t'; thislookup[i]->print(out); diff --git a/slayer.cpp b/slayer.cpp index 95734c7..8e9eb56 100644 --- a/slayer.cpp +++ b/slayer.cpp @@ -45,7 +45,7 @@ string Slayer::getResults(Sequence* query, vector refSeqs) { float snpRateLeft = numSNPSLeft / (float) winSizeLeft; float snpRateRight = numSNPSRight / (float) winSizeRight; - float logR = log(snpRateLeft / snpRateRight) / log(2); + float logR = log(snpRateLeft / snpRateRight) / log(2.0); // do not accept excess snp ratio on either side of the break if (abs(logR) < 1 ) { diff --git a/sparsematrix.cpp b/sparsematrix.cpp index 8a873f7..7477bd9 100644 --- a/sparsematrix.cpp +++ b/sparsematrix.cpp @@ -153,7 +153,7 @@ PCell* SparseMatrix::getSmallestCell(){ } } -// random_shuffle(mins.begin(), mins.end()); //randomize the order of the iterators in the mins vector + random_shuffle(mins.begin(), mins.end()); //randomize the order of the iterators in the mins vector for(int i=0;ivectorMap = &mins[i]; //assign vectorMap to the address for the container diff --git a/summarycommand.cpp b/summarycommand.cpp index a2f79f9..51fabf8 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -236,6 +236,8 @@ int SummaryCommand::execute(){ } if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = sabund->getLabel(); + delete sabund; sabund = input->getSAbundVector(lastLabel); @@ -250,6 +252,9 @@ int SummaryCommand::execute(){ sumCalculators[i]->print(outputFileHandle); } outputFileHandle << endl; + + //restore real lastlabel to save below + sabund->setLabel(saveLabel); } lastLabel = sabund->getLabel(); diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index ef0485c..575c0de 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -267,6 +267,8 @@ int SummarySharedCommand::execute(){ } if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); @@ -275,9 +277,10 @@ int SummarySharedCommand::execute(){ processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); } - - lastLabel = lookup[0]->getLabel(); diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index e15d56b..c115c54 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -443,6 +443,8 @@ void TreeGroupCommand::makeSimsShared() { } if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); @@ -451,6 +453,9 @@ void TreeGroupCommand::makeSimsShared() { processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); } lastLabel = lookup[0]->getLabel(); diff --git a/venncommand.cpp b/venncommand.cpp index b6a53e6..e26dbcc 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -223,6 +223,8 @@ int VennCommand::execute(){ } if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(lastLabel); @@ -235,6 +237,9 @@ int VennCommand::execute(){ for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor } venn->getPic(lookup, vennCalculators); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); } @@ -293,6 +298,8 @@ int VennCommand::execute(){ } if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = sabund->getLabel(); + delete sabund; sabund = input->getSAbundVector(lastLabel); @@ -301,6 +308,9 @@ int VennCommand::execute(){ processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel()); + + //restore real lastlabel to save below + sabund->setLabel(saveLabel); } lastLabel = sabund->getLabel(); -- 2.39.2