]> git.donarmstrong.com Git - mothur.git/commitdiff
added hcluster command and fixed some bugs, namely one with smart distancing.
authorwestcott <westcott>
Thu, 29 Oct 2009 13:56:37 +0000 (13:56 +0000)
committerwestcott <westcott>
Thu, 29 Oct 2009 13:56:37 +0000 (13:56 +0000)
38 files changed:
Mothur.xcodeproj/project.pbxproj
binsequencecommand.cpp
bootstrapsharedcommand.cpp
clustercommand.cpp
collectcommand.cpp
collectsharedcommand.cpp
commandfactory.cpp
getlistcountcommand.cpp
getoturepcommand.cpp
getrabundcommand.cpp
getsabundcommand.cpp
getsharedotucommand.cpp
globaldata.cpp
globaldata.hpp
hcluster.cpp [new file with mode: 0644]
hcluster.h [new file with mode: 0644]
hclustercommand.cpp [new file with mode: 0644]
hclustercommand.h [new file with mode: 0644]
heatmapcommand.cpp
heatmapsimcommand.cpp
matrixoutputcommand.cpp
nameassignment.cpp
nameassignment.hpp
rarefactcommand.cpp
rarefactsharedcommand.cpp
readcluster.cpp [new file with mode: 0644]
readcluster.h [new file with mode: 0644]
readcolumn.cpp
readcolumn.h
readdistcommand.cpp
screenseqscommand.cpp
sharedcommand.cpp
slayer.cpp
sparsematrix.cpp
summarycommand.cpp
summarysharedcommand.cpp
treegroupscommand.cpp
venncommand.cpp

index fa7c67251436f8c7c3997bf2a862de1eb1166bda..252c02eef75714e8610af6e8a5a7403a0b07bdc4 100644 (file)
                A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; };
                A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; };
                A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; };
+               A729ACD010848E6100139801 /* hclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A729ACCF10848E6100139801 /* hclustercommand.cpp */; };
+               A729ACE410849BBE00139801 /* hcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A729ACE310849BBE00139801 /* hcluster.cpp */; };
                A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; };
+               A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A751ACE91098B283003D0911 /* readcluster.cpp */; };
                A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
                A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */; };
                A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0450D106CEEC90046FC83 /* slayer.cpp */; };
                A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = SOURCE_ROOT; };
                A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; };
                A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; };
-               A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlistcountcommand.h; sourceTree = "<group>"; };
-               A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlistcountcommand.cpp; sourceTree = "<group>"; };
+               A729ACCE10848E6100139801 /* hclustercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hclustercommand.h; sourceTree = SOURCE_ROOT; };
+               A729ACCF10848E6100139801 /* hclustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hclustercommand.cpp; sourceTree = SOURCE_ROOT; };
+               A729ACE210849BBE00139801 /* hcluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hcluster.h; sourceTree = SOURCE_ROOT; };
+               A729ACE310849BBE00139801 /* hcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hcluster.cpp; sourceTree = SOURCE_ROOT; };
+               A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlistcountcommand.h; sourceTree = SOURCE_ROOT; };
+               A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlistcountcommand.cpp; sourceTree = SOURCE_ROOT; };
+               A751ACE81098B283003D0911 /* readcluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readcluster.h; sourceTree = SOURCE_ROOT; };
+               A751ACE91098B283003D0911 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readcluster.cpp; sourceTree = SOURCE_ROOT; };
                A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
                A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
                A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
                                374F2FE81006346600E97C66 /* chimera */,
                                37D927C20F21331F001D4494 /* cluster.hpp */,
                                37D927C10F21331F001D4494 /* cluster.cpp */,
+                               A729ACE210849BBE00139801 /* hcluster.h */,
+                               A729ACE310849BBE00139801 /* hcluster.cpp */,
                                37D928A90F2133E5001D4494 /* commands */,
                                37D927C60F21331F001D4494 /* collect.h */,
                                37D927C50F21331F001D4494 /* collect.cpp */,
                                7E412FE90F8D3E2C00381DD0 /* libshuff.cpp */,
                                37E3ED800F4D9D0D00B5DF02 /* mothur.h */,
                                37D927EF0F21331F001D4494 /* mothur.cpp */,
-                               37D927F10F21331F001D4494 /* nameassignment.hpp */,
-                               37D927F00F21331F001D4494 /* nameassignment.cpp */,
                                373C691E0FC1C98600137ACD /* nast.hpp */,
                                373C691D0FC1C98600137ACD /* nast.cpp */,
                                373C692A0FC1C9EB00137ACD /* nastreport.hpp */,
                3796441D0FB9B9650081FDB6 /* read */ = {
                        isa = PBXGroup;
                        children = (
+                               A751ACE81098B283003D0911 /* readcluster.h */,
+                               A751ACE91098B283003D0911 /* readcluster.cpp */,
                                375AA1340F9E433D008EF9B8 /* readcolumn.h */,
                                375AA1330F9E433D008EF9B8 /* readcolumn.cpp */,
                                37D928130F21331F001D4494 /* readmatrix.hpp */,
                                37B73CA51004D89A008C4B41 /* getseqscommand.cpp */,
                                A7E4A781106913F900688F62 /* getsharedotucommand.h */,
                                A7E4A782106913F900688F62 /* getsharedotucommand.cpp */,
+                               A729ACCE10848E6100139801 /* hclustercommand.h */,
+                               A729ACCF10848E6100139801 /* hclustercommand.cpp */,
                                375873F10F7D64800040F377 /* heatmapcommand.h */,
                                375873F00F7D64800040F377 /* heatmapcommand.cpp */,
                                378598640FDD497000EF9D03 /* heatmapsimcommand.h */,
                                37D927EB0F21331F001D4494 /* kmerdb.cpp */,
                                37D927EE0F21331F001D4494 /* listvector.hpp */,
                                37D927ED0F21331F001D4494 /* listvector.cpp */,
+                               37D927F10F21331F001D4494 /* nameassignment.hpp */,
+                               37D927F00F21331F001D4494 /* nameassignment.cpp */,
                                37D927F80F21331F001D4494 /* ordervector.hpp */,
                                37D927F70F21331F001D4494 /* ordervector.cpp */,
                                37D928000F21331F001D4494 /* rabundvector.hpp */,
                                A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */,
                                A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */,
                                A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */,
+                               A729ACD010848E6100139801 /* hclustercommand.cpp in Sources */,
+                               A729ACE410849BBE00139801 /* hcluster.cpp in Sources */,
+                               A751ACEA1098B283003D0911 /* readcluster.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 08ae6b607a13e3406aa4fe42d8a1f5025a41bbc0..ad90ff45202213fcb88cb222004628a82930ba80 100644 (file)
@@ -166,6 +166,8 @@ int BinSeqCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
                                delete list;
                                list = input->getListVector(lastLabel);
                                
@@ -174,6 +176,9 @@ int BinSeqCommand::execute(){
                                                                                                        
                                processedLabels.insert(list->getLabel());
                                userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
                        }
                        
                        lastLabel = list->getLabel();                   
index 0f758c15a0eafc98d620d2fb37980be6f221a767..1b682c78e3471fe7b4fef8528e8d74243374cd21 100644 (file)
@@ -224,6 +224,7 @@ int BootSharedCommand::execute(){
                        
                        //you have a label the user want that is smaller than this label and the last label has not already been processed
                        if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = order->getLabel();
                                
                                delete order;
                                order = input->getSharedOrderVector(lastLabel);                                                                                                 
@@ -232,6 +233,9 @@ int BootSharedCommand::execute(){
 
                                processedLabels.insert(order->getLabel());
                                userLabels.erase(order->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               order->setLabel(saveLabel);
                        }
                        
                        
index d3b9bbf5a45d182e24b3ff8cc8bc45f6fcbc896a..8208b7efbde6148c4d5c6c0ad0d090335edd6d0c 100644 (file)
@@ -71,7 +71,8 @@ ClusterCommand::ClusterCommand(string option){
                        
                        if (abort == false) {
                        
-                               //get matrix, list and rabund for execute
+       
+                                                       //get matrix, list and rabund for execute
                                if(globaldata->gSparseMatrix != NULL)   {       matrix = globaldata->gSparseMatrix;             }
                        
                                if(globaldata->gListVector != NULL){
index 7d3daef96cccf6d7b1604728fd1eaff9f2b78048..ea2d558544514e7f35e291033c23e20aca930a6a 100644 (file)
@@ -221,6 +221,7 @@ int CollectCommand::execute(){
                        }
                        //you have a label the user want that is smaller than this label and the last label has not already been processed 
                        if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = order->getLabel();
                                
                                delete order;
                                order = (input->getOrderVector(lastLabel));
@@ -232,6 +233,9 @@ int CollectCommand::execute(){
                                mothurOut(order->getLabel()); mothurOutEndLine();
                                processedLabels.insert(order->getLabel());
                                userLabels.erase(order->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               order->setLabel(saveLabel);
                        }
                        
                        lastLabel = order->getLabel();  
index 18faa2d699333bade9ce3f492b52a1dbe2eb10a2..844b138c1916d5be2cffe199ace4639b56945bd9 100644 (file)
@@ -244,6 +244,8 @@ int CollectSharedCommand::execute(){
                        
                        //you have a label the user want that is smaller than this label and the last label has not already been processed
                        if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = order->getLabel();
+                               
                                delete order;
                                order = input->getSharedOrderVector(lastLabel);
                                
@@ -255,6 +257,9 @@ int CollectSharedCommand::execute(){
                                mothurOut(order->getLabel()); mothurOutEndLine();
                                processedLabels.insert(order->getLabel());
                                userLabels.erase(order->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               order->setLabel(saveLabel);
                        }
                        
                        
index 9adf6791505060d7379fbfc387e1ce6f5d9f5afd..5f07abb59e03e161bd46bc60065eb15e37107a06 100644 (file)
@@ -57,6 +57,7 @@
 #include "secondarystructurecommand.h"
 #include "getsharedotucommand.h"
 #include "getlistcountcommand.h"
+#include "hclustercommand.h"
 
 /***********************************************************/
 
@@ -113,6 +114,8 @@ CommandFactory::CommandFactory(){
        commands["get.sharedotu"]               = "get.sharedotu";
        commands["get.listcount"]               = "get.listcount";
        commands["quit"]                                = "quit"; 
+       
+       commands["hcluster"]                    = "hcluster"; 
 
 }
 /***********************************************************/
@@ -177,6 +180,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "align.check")                   {       command = new AlignCheckCommand(optionString);                  }
                else if(commandName == "get.sharedotu")                 {       command = new GetSharedOTUCommand(optionString);                }
                else if(commandName == "get.listcount")                 {       command = new GetListCountCommand(optionString);                }
+               
+               else if(commandName == "hcluster")                              {       command = new HClusterCommand(optionString);                    }
                else                                                                                    {       command = new NoCommand(optionString);                                  }
 
                return command;
index f14e17a951eec96494dd36a87d259f6117cc758c..94437fddfa766efe0bf25233914fe59befd9f204 100644 (file)
@@ -120,6 +120,8 @@ int GetListCountCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
                                delete list;
                                list = input->getListVector(lastLabel);
                                
@@ -127,6 +129,9 @@ int GetListCountCommand::execute(){
                                                                                                        
                                processedLabels.insert(list->getLabel());
                                userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
                        }
                        
                        lastLabel = list->getLabel();                   
index 07486191e886c72d5d2aafd1db781bad96411874..e9bed002a11637f7ca1e4279b0a14690f3fd40b6 100644 (file)
@@ -199,6 +199,8 @@ int GetOTURepCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = list->getLabel();
+                                       
                                        delete list;
                                        list = input->getListVector(lastLabel);
                                        mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
@@ -207,6 +209,9 @@ int GetOTURepCommand::execute(){
                                        
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       list->setLabel(saveLabel);
                        }
                        
                        lastLabel = list->getLabel();
index 04b1b0badcf7dfc009296ac145aa49da50a3de50..2ed5b7be71e13b5369f4cc71aaab72258960d41f 100644 (file)
@@ -133,6 +133,8 @@ int GetRAbundCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = list->getLabel();
+                                       
                                        delete list;
                                        list = input->getListVector(lastLabel);
                                        
@@ -147,6 +149,9 @@ int GetRAbundCommand::execute(){
 
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       list->setLabel(saveLabel);
                        }
                        
                        lastLabel = list->getLabel();           
index 80a44297b48bfde50c633edfe7354e2f3e0cb115..3dd58c117945a299154ebd09cc60b0688ea9f714 100644 (file)
@@ -123,6 +123,8 @@ int GetSAbundCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = order->getLabel();
+                                       
                                        delete order;           
                                        order = (input->getOrderVector(lastLabel));
                                        
@@ -134,6 +136,9 @@ int GetSAbundCommand::execute(){
 
                                        processedLabels.insert(order->getLabel());
                                        userLabels.erase(order->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       order->setLabel(saveLabel);
                        }
                        
                        
index 3def8bc54f8b5a448a2023fb708eccd4e7f086f9..56f030d235212dec72677e15431fef6a6edbcad9 100644 (file)
@@ -92,7 +92,7 @@ void GetSharedOTUCommand::help(){
                mothurOut("With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n");
                mothurOut("The get.sharedotu command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n");
                mothurOut("The get.sharedotu command should be in the following format: get.sabund(label=yourLabels, groups=yourGroups, fasta=yourFastafile, output=yourOutput).\n");
-               mothurOut("Example get.sharedotu(label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n");
+               mothurOut("Example get.sharedotu(list=amazon.fn.list, label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n");
                mothurOut("The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n");
                mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
        }
@@ -161,12 +161,16 @@ int GetSharedOTUCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = list->getLabel();
                                        
                                        mothurOut(lastlist->getLabel()); 
                                        process(lastlist);
                                        
                                        processedLabels.insert(lastlist->getLabel());
                                        userLabels.erase(lastlist->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       list->setLabel(saveLabel);
                        }
 
                        lastLabel = list->getLabel();
index 690a8c3400c802844dc71c49f13f898a0e822e0e..c2871694e2feaa9219ed5d31575e57a84a3022dc 100644 (file)
@@ -27,12 +27,10 @@ string GlobalData::getNameFile()            {       return namefile;                }
 string GlobalData::getGroupFile()              {       return groupfile;               }
 string GlobalData::getOrderFile()              {       return orderfile;               }
 string GlobalData::getTreeFile()               {       return treefile;                }
-string GlobalData::getSharedFile()             {       return sharedfile;              }
-//string GlobalData::getFastaFile()            {       return fastafile;               }       
+string GlobalData::getSharedFile()             {       return sharedfile;              }       
 string GlobalData::getFormat()                 {       return format;                  }
 
 void GlobalData::setListFile(string file)              {       listfile = file;        inputFileName = file;                                   }
-//void GlobalData::setFastaFile(string file)           {       fastafile = file;       inputFileName = file;                                   }
 void GlobalData::setTreeFile(string file)              {       treefile = file;        inputFileName = file;                                   }
 void GlobalData::setRabundFile(string file)            {       rabundfile = file;      inputFileName = file;                                   }
 void GlobalData::setSabundFile(string file)            {       sabundfile = file;      inputFileName = file;                                   }
@@ -63,6 +61,7 @@ GlobalData::GlobalData() {
        gMatrix = NULL;
        gTreemap = NULL;
        gSequenceDB = NULL;
+       nameMap = NULL;
 }
 /*******************************************************/
 
@@ -115,6 +114,8 @@ void GlobalData::newRead() {
                        if (gTreemap != NULL) { delete gTreemap; gTreemap = NULL; }
 
                        if (gSequenceDB != NULL) { delete gSequenceDB; gSequenceDB = NULL;}
+                       
+                       if (nameMap != NULL) { delete nameMap; nameMap = NULL; }
 
 
                        gTree.clear();
@@ -147,6 +148,7 @@ GlobalData::~GlobalData() {
                if (gMatrix != NULL) { delete gMatrix; gMatrix = NULL;}
                if (gTreemap != NULL) { delete gTreemap; gTreemap = NULL; }
                if (gSequenceDB != NULL) { delete gSequenceDB; gSequenceDB = NULL;}
+               if (nameMap != NULL) { delete nameMap; nameMap = NULL; }
        }
        catch(exception& e) {
                errorOut(e, "GlobalData", "~GlobalData");
index e125e6f3d7032d78d2b1330dece6cf909a6bf320..0238a0aaeffa7a7e710182af82010ea058edf1df 100644 (file)
@@ -10,6 +10,7 @@
 #include "tree.h"
 #include "sparsematrix.hpp"
 #include "sequencedb.h"
+#include "nameassignment.hpp"
 
 
 class ListVector;
@@ -73,6 +74,8 @@ public:
        void setOrderFile(string file);
        void setFormat(string); //do we need this?
        
+       NameAssignment* nameMap;
+       
        void clear(); 
        void clearLabels();
        void clearAbund();
@@ -81,7 +84,7 @@ public:
        
 private:
 
-       string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, treefile, sharedfile, format;
+       string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, treefile, sharedfile, format, distfile;
 
        static GlobalData* _uniqueInstance;
        GlobalData( const GlobalData& ); // Disable copy constructor
diff --git a/hcluster.cpp b/hcluster.cpp
new file mode 100644 (file)
index 0000000..df36260
--- /dev/null
@@ -0,0 +1,299 @@
+/*
+ *  hcluster.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/13/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "hcluster.h"
+#include "rabundvector.hpp"
+#include "listvector.hpp"
+#include "sparsematrix.hpp"
+
+/***********************************************************************/
+
+HCluster::HCluster(RAbundVector* rav, ListVector* lv) :  rabund(rav), list(lv){
+       try {
+       
+               numSeqs = list->getNumSeqs();
+               
+               //initialize cluster array
+               for (int i = 0; i < numSeqs; i++) {
+                       clusterNode temp(1, -1, i);
+                       clusterArray.push_back(temp);
+               }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "HCluster");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+void HCluster::clusterBins(){
+       try {
+               //cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild);
+
+               rabund->set(clusterArray[smallCol].smallChild, rabund->get(clusterArray[smallRow].smallChild)+rabund->get(clusterArray[smallCol].smallChild));  
+               rabund->set(clusterArray[smallRow].smallChild, 0);      
+               rabund->setLabel(toString(smallDist));
+
+               //cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl;
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "clusterBins");
+               exit(1);
+       }
+
+
+}
+
+/***********************************************************************/
+
+void HCluster::clusterNames(){
+       try {
+               ///cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild);
+
+               list->set(clusterArray[smallCol].smallChild, list->get(clusterArray[smallRow].smallChild)+','+list->get(clusterArray[smallCol].smallChild));
+               list->set(clusterArray[smallRow].smallChild, "");       
+               list->setLabel(toString(smallDist));
+       
+               //cout << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild) << endl;
+
+    }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "clusterNames");
+               exit(1);
+       }
+
+}
+/***********************************************************************/
+int HCluster::getUpmostParent(int node){
+       try {
+               
+               while (clusterArray[node].parent != -1) {
+                       node = clusterArray[node].parent;
+               }
+               
+               return node;
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "getUpmostParent");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void HCluster::printInfo(){
+       try {
+               
+               cout << "link table" << endl;
+               for (it = activeLinks.begin(); it!= activeLinks.end(); it++) {
+                       cout << it->first << " = " << it->second << endl;
+               }
+               cout << endl;
+               for (int i = 0; i < linkTable.size(); i++) {
+                       for (it = linkTable[i].begin(); it != linkTable[i].end(); it++) {
+                               cout << it->first << '\t' << it->second << '\t' << '\t';
+                       }
+                       cout << endl;
+               }
+               cout << endl << "clusterArray" << endl;
+               
+               for (int i = 0; i < clusterArray.size(); i++) {
+                       cout << i << '\t' << clusterArray[i].numSeq << '\t' << clusterArray[i].parent << '\t' << clusterArray[i].smallChild << endl;
+               }
+               cout << endl;
+               
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "getUpmostParent");
+               exit(1);
+       }
+}
+/***********************************************************************/
+int HCluster::makeActive() {
+       try {
+       
+               int linkValue = 1; 
+//cout << "active - here" << endl;             
+               it = activeLinks.find(smallRow);
+               it2 = activeLinks.find(smallCol);
+               
+               if ((it == activeLinks.end()) && (it2 == activeLinks.end())) { //both are not active so add them
+                       int size = linkTable.size();
+                       map<int, int> temp; map<int, int> temp2;
+                       
+                       //add link to eachother
+                       temp[smallRow] = 1;                                                     //         1    2
+                       temp2[smallCol] = 1;                                            // 1   0        1
+                                                                                                               // 2   1        0
+                       linkTable.push_back(temp);
+                       linkTable.push_back(temp2);
+                       
+                       //add to activeLinks
+                       activeLinks[smallRow] = size;
+                       activeLinks[smallCol] = size+1;
+//cout << "active - here1" << endl;
+               }else if ((it != activeLinks.end()) && (it2 == activeLinks.end())) {  //smallRow is active, smallCol is not
+                        int size = linkTable.size();
+                        int alreadyActiveRow = it->second;
+                        map<int, int> temp; 
+                       
+                       //add link to eachother
+                       temp[smallRow] = 1;                                                     //         6    2       3       5
+                       linkTable.push_back(temp);                                      // 6   0        1       2       0
+                       linkTable[alreadyActiveRow][smallCol] = 1;      // 2   1        0       1       1
+                                                                                                               // 3   2        1       0       0
+                                                                                                               // 5   0    1   0   0   
+                       //add to activeLinks
+                       activeLinks[smallCol] = size;
+//cout << "active - here2" << endl;                    
+               }else if ((it == activeLinks.end()) && (it2 != activeLinks.end())) {  //smallCol is active, smallRow is not
+                        int size = linkTable.size();
+                        int alreadyActiveCol = it2->second;
+                        map<int, int> temp; 
+                       
+                       //add link to eachother
+                       temp[smallCol] = 1;                                                     //         6    2       3       5
+                       linkTable.push_back(temp);                                      // 6   0        1       2       0
+                       linkTable[alreadyActiveCol][smallRow] = 1;      // 2   1        0       1       1
+                                                                                                               // 3   2        1       0       0
+                                                                                                               // 5   0    1   0   0   
+                       //add to activeLinks
+                       activeLinks[smallRow] = size;
+//cout << "active - here3" << endl;
+               }else { //both are active so add one
+                       int row = it->second;
+                       int col = it2->second;
+//cout << row << '\t' << col << endl;                  
+                       
+                       linkTable[row][smallCol]++;
+                       linkTable[col][smallRow]++;
+                       linkValue = linkTable[row][smallCol];
+//cout << "active - here4" << endl;
+               }
+               
+               return linkValue;
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "makeActive");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void HCluster::updateArrayandLinkTable() {
+       try {
+               //if cluster was made update clusterArray and linkTable
+                       int size = clusterArray.size();
+                       
+                       //add new node
+                       clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild);
+                       clusterArray.push_back(temp);
+                       
+                       //update child nodes
+                       clusterArray[smallRow].parent = size;
+                       clusterArray[smallCol].parent = size;
+                       
+                       //update linkTable by merging clustered rows and columns
+                       int rowSpot = activeLinks[smallRow];
+                       int colSpot = activeLinks[smallCol];
+       //cout << "here" << endl;               
+                       //fix old rows
+                       for (int i = 0; i < linkTable.size(); i++) {
+                               //check if they are in map
+                               it = linkTable[i].find(smallRow);
+                               it2 = linkTable[i].find(smallCol);
+                               
+                               if ((it!=linkTable[i].end()) && (it2!=linkTable[i].end())) { //they are both there
+                                       linkTable[i][size] = linkTable[i][smallRow]+linkTable[i][smallCol];
+                                       linkTable[i].erase(smallCol); //delete col row
+                                       linkTable[i].erase(smallRow); //delete col row
+                               }else if ((it==linkTable[i].end()) && (it2!=linkTable[i].end())) { //only col
+                                       linkTable[i][size] = linkTable[i][smallCol];
+                                       linkTable[i].erase(smallCol); //delete col 
+                               }else if ((it!=linkTable[i].end()) && (it2==linkTable[i].end())) { //only row
+                                       linkTable[i][size] = linkTable[i][smallRow];
+                                       linkTable[i].erase(smallRow); //delete col 
+                               }
+                       }
+       //printInfo();
+//cout << "here2" << endl;
+                       //merge their values
+                       for (it = linkTable[rowSpot].begin(); it != linkTable[rowSpot].end(); it++) {
+                               it2 = linkTable[colSpot].find(it->first);  //does the col also have this
+                               
+                               if (it2 == linkTable[colSpot].end()) { //not there so add it
+                                       linkTable[colSpot][it->first] = it->second;
+                               }else { //merge them
+                                       linkTable[colSpot][it->first] = it->second+it2->second;
+                               }
+                       }
+//cout << "here3" << endl;                     
+                       linkTable[colSpot].erase(size);
+                       linkTable.erase(linkTable.begin()+rowSpot);  //delete col
+       //printInfo();          
+                       //update activerows
+                       activeLinks.erase(smallRow);
+                       activeLinks.erase(smallCol);
+                       
+                       if(rowSpot>colSpot) {   activeLinks[size] = colSpot;    }
+                       else{   activeLinks[size] = colSpot-1;  }
+                       
+                       //adjust everybody elses spot since you deleted - time vs. space
+                       for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
+                               if (it->second > rowSpot) {  activeLinks[it->first]--;  }
+                       }
+                       
+                       
+//cout << "here4" << endl;
+       
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "updateArrayandLinkTable");
+               exit(1);
+       }
+}
+/***********************************************************************/
+bool HCluster::update(int row, int col, float distance){
+       try {
+               
+               smallRow = row;
+               smallCol = col;
+               smallDist = distance;
+               bool clustered = false;
+               
+               //find upmost parent of row and col
+               smallRow = getUpmostParent(smallRow);
+               smallCol = getUpmostParent(smallCol);
+       //cout << "smallRow = " << smallRow << " smallCol = " << smallCol << endl;
+               
+               //are they active in the link table
+               int linkValue = makeActive(); //after this point this nodes info is active in linkTable
+       //printInfo();                  
+//cout << "linkValue = " << linkValue << " times = " << (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq) << endl;
+               //can we cluster???
+               if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) {
+                       updateArrayandLinkTable();
+                       clusterBins();
+                       clusterNames();
+                       clustered = true;
+                       //printInfo();
+               }
+               
+               
+               return clustered;
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "update");
+               exit(1);
+       }
+}
+
+
+/***********************************************************************/
+
+
diff --git a/hcluster.h b/hcluster.h
new file mode 100644 (file)
index 0000000..2688c19
--- /dev/null
@@ -0,0 +1,69 @@
+#ifndef HCLUSTER_H
+#define HCLUSTER_H
+
+/*
+ *  hcluster.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/13/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+
+class RAbundVector;
+class ListVector;
+/************************************************************/
+struct clusterNode {
+       int numSeq;
+       int parent;
+       int smallChild; //used to make linkTable work with list and rabund
+       clusterNode(int num, int par, int kid) : numSeq(num), parent(par), smallChild(kid) {};
+};
+
+/***********************************************************************/
+class HCluster {
+       
+public:
+       HCluster(RAbundVector*, ListVector*);
+    bool update(int, int, float);
+       //string getTag();
+
+protected:     
+       void clusterBins();
+       void clusterNames();
+       int getUpmostParent(int);
+       int makeActive();
+       void printInfo();
+       void updateArrayandLinkTable();
+               
+       RAbundVector* rabund;
+       ListVector* list;
+       
+       vector<clusterNode> clusterArray;
+       vector< map<int, int> > linkTable;  // vector of maps - linkTable[1][6] = 2  would mean sequence in spot 1 has 2 links with sequence in 6
+       map<int, int> activeLinks;  //maps sequence to index in linkTable
+       map<int, int>::iterator it;
+       map<int, int>::iterator it2;
+       
+       int numSeqs;
+       
+       int smallRow;
+       int smallCol;
+       float smallDist;
+       
+};
+
+/***********************************************************************/
+
+
+
+
+
+
+
+#endif
+
+
diff --git a/hclustercommand.cpp b/hclustercommand.cpp
new file mode 100644 (file)
index 0000000..38ba4b6
--- /dev/null
@@ -0,0 +1,370 @@
+/*
+ *  hclustercommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/13/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "hclustercommand.h"
+
+//**********************************************************************************************************************
+//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
+HClusterCommand::HClusterCommand(string option){
+       try{
+               globaldata = GlobalData::getInstance();
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"cutoff","precision","method","showabund","timing","phylip","column","name","sorted"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+               
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
+                                       abort = true;
+                               }
+                       }
+                       
+                       globaldata->newRead();
+                       
+                       //check for required parameters
+                       phylipfile = validParameter.validFile(parameters, "phylip", true);
+                       if (phylipfile == "not open") { abort = true; }
+                       else if (phylipfile == "not found") { phylipfile = ""; }        
+                       else {  distfile = phylipfile;  format = "phylip";      }
+                       
+                       columnfile = validParameter.validFile(parameters, "column", true);
+                       if (columnfile == "not open") { abort = true; } 
+                       else if (columnfile == "not found") { columnfile = ""; }
+                       else {  distfile = columnfile; format = "column";       }
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }   
+                       else if (namefile == "not found") { namefile = ""; }
+                       
+                       if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a cluster command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; }
+                       else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a cluster command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; }
+               
+                       if (columnfile != "") {
+                               if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
+                       }
+                       
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       //get user cutoff and precision or use defaults
+                       string temp;
+                       temp = validParameter.validFile(parameters, "precision", false);
+                       if (temp == "not found") { temp = "100"; }
+                       //saves precision legnth for formatting below
+                       length = temp.length();
+                       convert(temp, precision); 
+                       
+                       temp = validParameter.validFile(parameters, "cutoff", false);
+                       if (temp == "not found") { temp = "10"; }
+                       convert(temp, cutoff); 
+                       cutoff += (5 / (precision * 10.0));
+                       
+                       method = validParameter.validFile(parameters, "method", false);
+                       if (method == "not found") { method = "nearest"; }
+                       
+                       if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
+                       else { mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); mothurOutEndLine(); abort = true; }
+
+                       showabund = validParameter.validFile(parameters, "showabund", false);
+                       if (showabund == "not found") { showabund = "T"; }
+                       
+                       sort = validParameter.validFile(parameters, "sorted", false);
+                       if (sort == "not found") { sort = "F"; }
+                       sorted = isTrue(sort);
+
+                       timing = validParameter.validFile(parameters, "timing", false);
+                       if (timing == "not found") { timing = "F"; }
+                       
+                               
+                       if (abort == false) {
+                                                                                       
+                               fileroot = getRootName(distfile);
+                               
+                               tag = "fn";  //until we figure out average and nearest methods
+                       
+                               openOutputFile(fileroot+ tag + ".sabund",       sabundFile);
+                               openOutputFile(fileroot+ tag + ".rabund",       rabundFile);
+                               openOutputFile(fileroot+ tag + ".list",         listFile);
+                       }
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "HClusterCommand", "HClusterCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+void HClusterCommand::help(){
+       try {
+               mothurOut("The cluster command can only be executed after a successful read.dist command.\n");
+               mothurOut("The cluster command parameter options are method, cuttoff, precision, showabund and timing. No parameters are required.\n");
+               mothurOut("The cluster command should be in the following format: \n");
+               mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
+               mothurOut("The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n"); 
+       }
+       catch(exception& e) {
+               errorOut(e, "HClusterCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+HClusterCommand::~HClusterCommand(){}
+
+//**********************************************************************************************************************
+
+int HClusterCommand::execute(){
+       try {
+       
+               if (abort == true) {    return 0;       }
+               
+               if(namefile != ""){     
+                       globaldata->nameMap = new NameAssignment(namefile);
+                       globaldata->nameMap->readMap();
+               }else{
+                       globaldata->nameMap = NULL;
+               }
+               
+               if (!sorted) {
+                       read = new ReadCluster(distfile, cutoff);       
+                       read->setFormat(format);
+                       read->read(globaldata->nameMap);
+                       distfile = read->getOutputFile();
+               
+                       list = read->getListVector();
+                       delete read;
+               }else {
+                       list = new ListVector(globaldata->nameMap->getListVector());
+               }
+       
+               //list vector made by read contains all sequence names
+               if(list != NULL){
+                       rabund = new RAbundVector(list->getRAbundVector());
+               }else{
+                       mothurOut("Error: no list vector!"); mothurOutEndLine(); return 0;
+               }
+               
+               
+               
+               float previousDist = 0.00000;
+               float rndPreviousDist = 0.00000;
+               oldRAbund = *rabund;
+               oldList = *list;
+               
+               print_start = true;
+               start = time(NULL);
+               
+//cout << "here" << endl;      
+               ifstream in;
+               openInputFile(distfile, in);
+               string firstName, secondName;
+               float distance;
+               
+               cluster = new HCluster(rabund, list);
+               bool clusteredSomething;
+               vector<seqDist> seqs; seqs.resize(1); // to start loop
+               exitedBreak = false;  //lets you know if there is a distance stored in next
+               
+               while (seqs.size() != 0){
+               
+                       seqs = getSeqs(in);
+                       random_shuffle(seqs.begin(), seqs.end());
+                       
+                       if (seqs.size() == 0) { break; } //there are no more distances
+               
+                       for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
+
+                               if (print_start && isTrue(timing)) {
+                                       mothurOut("Clustering (" + tag + ") dist " + toString(distance) + "/" 
+                                                         + toString(roundDist(distance, precision)) 
+                                                         + "\t(precision: " + toString(precision) + ")");
+                                       cout.flush();
+                                       print_start = false;
+                               }
+                               
+       ///cout << "before cluster update" << endl;
+                               if (seqs[i].seq1 != seqs[i].seq2) {
+                                       clusteredSomething = cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+                                       
+                                       float rndDist = roundDist(seqs[i].dist, precision);
+                                       //cout << "after cluster update clusterSomething = " << clusteredSomething << " rndDist = " << rndDist << " rndPreviousDist = " << rndPreviousDist << endl;                     
+                                       
+                                       
+                                       if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
+                                               printData("unique");
+                                       }
+                                       else if((rndDist != rndPreviousDist)){
+                                               printData(toString(rndPreviousDist,  length-1));
+                                       }
+                                       
+                                       previousDist = seqs[i].dist;
+                                       rndPreviousDist = rndDist;
+                                       oldRAbund = *rabund;
+                                       oldList = *list;
+                               }
+                       }
+               }
+               
+               in.close();
+
+               if (print_start && isTrue(timing)) {
+                       //mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) 
+                                        //+ "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
+                       cout.flush();
+                       print_start = false;
+               }
+       
+               if(previousDist <= 0.0000){
+                       printData("unique");
+               }
+               else if(rndPreviousDist<cutoff){
+                       printData(toString(rndPreviousDist, length-1));
+               }
+               
+               //delete globaldata's copy of the sparsematrix and listvector to free up memory
+               delete globaldata->gListVector;  globaldata->gListVector = NULL;
+               
+               //saves .list file so you can do the collect, rarefaction and summary commands without doing a read.list
+               if (globaldata->getFormat() == "phylip") { globaldata->setPhylipFile(""); }
+               else if (globaldata->getFormat() == "column") { globaldata->setColumnFile(""); }
+               
+               globaldata->setListFile(fileroot+ tag + ".list");
+               globaldata->setNameFile("");
+               globaldata->setFormat("list");
+               
+               sabundFile.close();
+               rabundFile.close();
+               listFile.close();
+               if (isTrue(timing)) {
+                       //mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster " + toString(ndist) + " distances"); mothurOutEndLine();
+               }
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "HClusterCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+void HClusterCommand::printData(string label){
+       try {
+               if (isTrue(timing)) {
+                       mothurOut("\tTime: " + toString(time(NULL) - start) + "\tsecs for " + toString(oldRAbund.getNumBins()) 
+                    + "\tclusters. Updates: " + toString(loops)); mothurOutEndLine();
+               }
+               print_start = true;
+               loops = 0;
+               start = time(NULL);
+
+               oldRAbund.setLabel(label);
+               if (isTrue(showabund)) {
+                       oldRAbund.getSAbundVector().print(cout);
+               }
+               oldRAbund.print(rabundFile);
+               oldRAbund.getSAbundVector().print(sabundFile);
+       
+               oldList.setLabel(label);
+               oldList.print(listFile);
+       }
+       catch(exception& e) {
+               errorOut(e, "HClusterCommand", "printData");
+               exit(1);
+       }
+
+
+}
+//**********************************************************************************************************************
+
+vector<seqDist> HClusterCommand::getSeqs(ifstream& filehandle){
+       try {
+               string firstName, secondName;
+               float distance, prevDistance;
+               vector<seqDist> sameSeqs;
+               prevDistance = -1;
+               
+               //if you are not at the beginning of the file
+               if (exitedBreak) { 
+                       sameSeqs.push_back(next);
+                       prevDistance = next.dist;
+                       exitedBreak = false;
+               }
+       
+               //get entry
+               while (filehandle) {
+                       
+                       filehandle >> firstName >> secondName >> distance;  
+//cout << firstName << '\t' << secondName << '\t' << distance << endl;
+                       gobble(filehandle);
+                       
+                       //save first one
+                       if (prevDistance == -1) { prevDistance = distance; }
+                       
+                       map<string,int>::iterator itA = globaldata->nameMap->find(firstName);
+                       map<string,int>::iterator itB = globaldata->nameMap->find(secondName);
+                       
+                       if(itA == globaldata->nameMap->end()){
+                               cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
+                       }
+                       if(itB == globaldata->nameMap->end()){
+                               cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
+                       }
+                       
+                       //using cutoff
+                       if (distance > cutoff) { break; }
+                       
+                       if (distance != -1) { //-1 means skip me
+                               
+                               //are the distances the same
+                               if (distance == prevDistance) { //save in vector
+                                       seqDist temp;
+                                       temp.seq1 = itA->second;
+                                       temp.seq2 = itB->second;
+                                       temp.dist = distance;
+                                       sameSeqs.push_back(temp);
+                                       exitedBreak = false;
+                                       //what about precision??
+                                       
+                               }else{ 
+                                       next.seq1 = itA->second;
+                                       next.seq2 = itB->second;
+                                       next.dist = distance;
+                                       exitedBreak = true;
+                                       break;
+                               }
+                               
+                       }
+               }
+
+               return sameSeqs;
+       }
+       catch(exception& e) {
+               errorOut(e, "HClusterCommand", "getSeqs");
+               exit(1);
+       }
+
+
+}
+
+//**********************************************************************************************************************
+
diff --git a/hclustercommand.h b/hclustercommand.h
new file mode 100644 (file)
index 0000000..7351cef
--- /dev/null
@@ -0,0 +1,80 @@
+#ifndef HCLUSTERCOMMAND_H
+#define HCLUSTERCOMMAND_H
+
+/*
+ *  hclustercommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/13/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "globaldata.hpp"
+#include "hcluster.h"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+#include "listvector.hpp"
+#include "readcluster.h"
+
+/******************************************************************/
+//This command is an implementation of the HCluster algorythmn described in 
+//ESPRIT: estimating species richness using large collections of 16S rRNA pyrosequences by
+//Yijun Sun1,2,*, Yunpeng Cai2, Li Liu1, Fahong Yu1, Michael L. Farrell3, William McKendree3 
+//and William Farmerie1 1 
+
+//Interdisciplinary Center for Biotechnology Research, 2Department of Electrical and Computer Engineering, 
+//University of Florida, Gainesville, FL 32610-3622 and 3Materials Technology Directorate, Air Force Technical 
+//Applications Center, 1030 S. Highway A1A, Patrick AFB, FL 32925-3002, USA 
+//Received January 28, 2009; Revised April 14, 2009; Accepted April 15, 2009 
+
+/******************************************************************/
+
+
+/************************************************************/
+struct seqDist {
+       int seq1;
+       int seq2;
+       float dist;
+};
+/************************************************************/
+class HClusterCommand : public Command {
+       
+public:
+       HClusterCommand(string);        
+       ~HClusterCommand();
+       int execute();  
+       void help();
+       
+private:
+       GlobalData* globaldata;
+       HCluster* cluster;
+       ListVector* list;
+       RAbundVector* rabund;
+       RAbundVector oldRAbund;
+       ListVector oldList;
+       ReadCluster* read;
+       
+       bool abort;
+
+       string method, fileroot, tag, distfile, format, phylipfile, columnfile, namefile, sort;
+       double cutoff;
+       string showabund, timing;
+       int precision, length;
+       ofstream sabundFile, rabundFile, listFile;
+       //ifstream in;
+       seqDist next;
+       bool exitedBreak, sorted;
+
+       bool print_start;
+       time_t start;
+       unsigned long loops;
+       
+       void printData(string label);
+       vector<seqDist> getSeqs(ifstream&);
+};
+
+/************************************************************/
+
+#endif
index 52395bb2a2613f24846e031f530519afbd2ed39c..1992ce593a7e65c2638f7d5ac0c5f39e4f50b5f9 100644 (file)
@@ -163,6 +163,8 @@ int HeatMapCommand::execute(){
                                }
                                
                                if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = lookup[0]->getLabel();
+                               
                                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
                                        lookup = input->getSharedRAbundVectors(lastLabel);
                                        mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
@@ -171,6 +173,9 @@ int HeatMapCommand::execute(){
                                        
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       lookup[0]->setLabel(saveLabel);
                                }
                                
                                lastLabel = lookup[0]->getLabel();
@@ -223,7 +228,8 @@ int HeatMapCommand::execute(){
                                }
                                
                                if ((anyLabelsToProcess(rabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-
+                                       string saveLabel = rabund->getLabel();
+                                       
                                        delete rabund;
                                        rabund = input->getRAbundVector(lastLabel);
                                        mothurOut(rabund->getLabel()); mothurOutEndLine();
@@ -232,6 +238,9 @@ int HeatMapCommand::execute(){
                                        
                                        processedLabels.insert(rabund->getLabel());
                                        userLabels.erase(rabund->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       rabund->setLabel(saveLabel);
                                }               
                                
                                                                
index d88bec0147095a1ed8e57e14c7e771548f86ffe6..3803692be8bf9d0b1b02997d90863fbbba196665 100644 (file)
@@ -197,6 +197,7 @@ int HeatMapSimCommand::execute(){
                        }
                                
                        if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookup[0]->getLabel();
                        
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                                lookup = input->getSharedRAbundVectors(lastLabel);                              
@@ -206,6 +207,9 @@ int HeatMapSimCommand::execute(){
                                        
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
                        }
                                
                        //prevent memory leak
index 6ccbf35d9affe9d0ab345018ba2165fe80d37dd8..5069e0f07684391033c7c1e2b7595dca9b18e30f 100644 (file)
@@ -196,7 +196,8 @@ int MatrixOutputCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                       
+                               string saveLabel = lookup[0]->getLabel();
+                               
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                                lookup = input->getSharedRAbundVectors(lastLabel);
 
@@ -205,6 +206,9 @@ int MatrixOutputCommand::execute(){
                                
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
                        }
 
                        lastLabel = lookup[0]->getLabel();                      
index c31468c36cbd422a5d8aa72a99cbfc02f7abb5ff..925608659a4261fc6924d23b16bbcafb13ed3558 100644 (file)
@@ -39,6 +39,20 @@ void NameAssignment::readMap(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+void NameAssignment::push_back(string name) {
+       try{
+       
+               int num = (*this).size();
+               (*this)[name] = num;
+               
+               list.push_back(name);
+       }
+       catch(exception& e) {
+               errorOut(e, "NameAssignment", "push_back");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
 
index e36a76396b96d3221cc77ce60f05b18f3202378e..a9bf06b423c5a263f6e48722e4a125982c4385da 100644 (file)
@@ -11,6 +11,7 @@ public:
        ListVector getListVector();
        int get(string);
        void print();
+       void push_back(string);
 private:
        ifstream fileHandle;
        ListVector list;
index 76d66023237c025b6f5a8b0216f9dcdcb85e0110..4417d5c7ae3ca1f186308790b63d085ab553cc5a 100644 (file)
@@ -195,6 +195,8 @@ int RareFactCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = order->getLabel();
+                               
                                delete order;
                                order = (input->getOrderVector(lastLabel));
                                
@@ -205,6 +207,9 @@ int RareFactCommand::execute(){
                                mothurOut(order->getLabel()); mothurOutEndLine();
                                processedLabels.insert(order->getLabel());
                                userLabels.erase(order->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               order->setLabel(saveLabel);
                        }
                        
                        lastLabel = order->getLabel();          
index 022ae65bf8530da954589881fbcfa811adba8082..83fb35ed883fbb748d8cb5a6191aa136c692ae06 100644 (file)
@@ -188,6 +188,8 @@ int RareFactSharedCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = lookup[0]->getLabel();
+                       
                                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                                        lookup = input->getSharedRAbundVectors(lastLabel);
 
@@ -198,6 +200,9 @@ int RareFactSharedCommand::execute(){
 
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       lookup[0]->setLabel(saveLabel);
                        }
                                
                        
diff --git a/readcluster.cpp b/readcluster.cpp
new file mode 100644 (file)
index 0000000..50787e9
--- /dev/null
@@ -0,0 +1,264 @@
+/*
+ *  readcluster.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/28/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "readcluster.h"
+
+/***********************************************************************/
+
+ReadCluster::ReadCluster(string distfile, float c){
+        distFile = distfile;
+               cutoff = c;
+}
+
+/***********************************************************************/
+
+void ReadCluster::read(NameAssignment* nameMap){
+       try {
+        
+               if (format == "phylip") { convertPhylip2Column(nameMap); }
+               else { list = new ListVector(nameMap->getListVector());  }
+               
+               createHClusterFile();
+                       
+       }
+       catch(exception& e) {
+               errorOut(e, "ReadCluster", "read");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+void ReadCluster::createHClusterFile(){
+       try {   
+               string outfile = getRootName(distFile) + "sorted.dist";
+               
+               //if you can, use the unix sort since its been optimized for years
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       string command = "sort -n -k +3 " + distFile + " -o " + outfile;
+                       system(command.c_str());
+               #else //you are stuck with my best attempt...
+                       //windows sort does not have a way to specify a column, only a character in the line
+                       //since we cannot assume that the distance will always be at the the same character location on each line
+                       //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back.
+               
+                       //read in file line by file and put distance first
+                       string tempDistFile = distFile + ".temp";
+                       ifstream input;
+                       ofstream output;
+                       openInputFile(distFile, input);
+                       openOutputFile(tempDistFile, output);
+
+                       string firstName, secondName;
+                       float dist;
+                       while (input) {
+                               input >> firstName >> secondName >> dist;
+                               output << dist << '\t' << firstName << '\t' << secondName << endl;
+                               gobble(input);
+                       }
+                       input.close();
+                       output.close();
+               
+       
+                       //sort using windows sort
+                       string tempOutfile = outfile + ".temp";
+                       string command = "sort " + tempDistFile + " /O " + tempOutfile;
+                       system(command.c_str());
+               
+                       //read in sorted file and put distance at end again
+                       ifstream input2;
+                       openInputFile(tempOutfile, input2);
+                       openOutputFile(outfile, output);
+               
+                       while (input2) {
+                               input2 >> dist >> firstName >> secondName;
+                               output << firstName << '\t' << secondName << '\t' << dist << endl;
+                               gobble(input2);
+                       }
+                       input2.close();
+                       output.close();
+               
+                       //remove temp files
+                       remove(tempDistFile.c_str());
+                       remove(tempOutfile.c_str());
+               #endif
+               
+               OutPutFile = outfile;
+       }
+       catch(exception& e) {
+               errorOut(e, "ReadCluster", "createHClusterFile");
+               exit(1);
+       }
+}
+
+
+/***********************************************************************/
+
+void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
+       try {   
+               //convert phylip file to column file
+               map<int, string> rowToName;
+               map<int, string>::iterator it;
+               
+               ifstream in;
+               ofstream out;
+               string tempFile = distFile + ".column.temp";
+               
+               openInputFile(distFile, in);
+               openOutputFile(tempFile, out);
+               
+               float distance;
+               int square, nseqs;
+               string name;
+               vector<string> matrixNames;
+        
+               in >> nseqs >> name;
+               rowToName[0] = name;
+               matrixNames.push_back(name);
+               
+               if(nameMap == NULL){
+                       list = new ListVector(nseqs);
+                       list->set(0, name);
+               }
+               else{
+                       list = new ListVector(nameMap->getListVector());
+                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+               }
+        
+               char d;
+               while((d=in.get()) != EOF){
+                       
+                       if(isalnum(d)){
+                               square = 1;
+                               in.putback(d);
+                               for(int i=0;i<nseqs;i++){
+                                       in >> distance;
+                               }
+                               break;
+                       }
+                       if(d == '\n'){
+                               square = 0;
+                               break;
+                       }
+               }
+        
+               if(square == 0){
+                                       
+                       for(int i=1;i<nseqs;i++){
+                               in >> name;
+                               rowToName[i] = name;
+                               matrixNames.push_back(name);
+                               
+                               //there's A LOT of repeated code throughout this method...
+                               if(nameMap == NULL){
+                                       list->set(i, name);
+                                       
+                                       for(int j=0;j<i;j++){
+                                               in >> distance;
+                                               
+                                               if (distance == -1) { distance = 1000000; }
+                                               
+                                               if(distance < cutoff){
+                                                       out << i << '\t' << j << '\t' << distance << endl;
+                                               }
+                                       }
+                                       
+                               }
+                               else{
+                                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+                                       
+                                       for(int j=0;j<i;j++){
+                                               in >> distance;
+                                               
+                                               if (distance == -1) { distance = 1000000; }
+                                               
+                                               if(distance < cutoff){
+                                                       out << i << '\t' << j << '\t' << distance << endl;
+                                               }
+                                       }
+                               }
+                       }
+               }
+               else{
+                       
+                       for(int i=1;i<nseqs;i++){
+                               in >> name;                
+                               rowToName[i] = name;
+                               matrixNames.push_back(name);
+                               
+                               if(nameMap == NULL){
+                                       list->set(i, name);
+                                       for(int j=0;j<nseqs;j++){
+                                               in >> distance;
+                                               
+                                               if (distance == -1) { distance = 1000000; }
+                                               
+                                               if(distance < cutoff && j < i){
+                                                       out << i << '\t' << j << '\t' << distance << endl;
+                                               }
+                                       }
+                                       
+                               }
+                               else{
+                                       if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
+                                       
+                                       for(int j=0;j<nseqs;j++){
+                                               in >> distance;
+                        
+                                               if (distance == -1) { distance = 1000000; }
+                                               
+                                               if(distance < cutoff && j < i){
+                                                       out << i << '\t' << j << '\t' << distance << endl;
+                                               }
+                                               
+                                       }
+                               }
+                       }
+               }
+               
+               list->setLabel("0");
+               in.close();
+               out.close();
+               
+               if(nameMap == NULL){
+                       for(int i=0;i<matrixNames.size();i++){
+                               nameMap->push_back(matrixNames[i]);
+                       }
+               }
+               
+               ifstream in2;
+               ofstream out2;
+               
+               string outputFile = getRootName(distFile) + "column.dist";
+               openInputFile(tempFile, in2);
+               openOutputFile(outputFile, out2);
+               
+               int first, second;
+               float dist;
+               
+               while (in2) {
+                       in2 >> first >> second >> dist;
+                       out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
+                       gobble(in2);
+               }
+               in2.close();
+               out2.close();
+               
+               remove(tempFile.c_str());
+               distFile = outputFile;
+       }
+       catch(exception& e) {
+               errorOut(e, "ReadCluster", "convertPhylip2Column");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+ReadCluster::~ReadCluster(){}
+/***********************************************************************/
+
diff --git a/readcluster.h b/readcluster.h
new file mode 100644 (file)
index 0000000..c81b1dd
--- /dev/null
@@ -0,0 +1,43 @@
+#ifndef READCLUSTER_H
+#define READCLUSTER_H
+/*
+ *  readcluster.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/28/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+#include "nameassignment.hpp"
+#include "listvector.hpp"
+
+
+/******************************************************/
+
+class ReadCluster {
+       
+public:
+       ReadCluster(string, float);
+       ~ReadCluster();
+       void read(NameAssignment*);
+       string getOutputFile() { return OutPutFile; }
+       void setFormat(string f) { format = f;  }
+       ListVector* getListVector()             {       return list;    }
+       
+private:
+       string distFile;
+       string OutPutFile, format;
+       ListVector* list;
+       float cutoff;
+       
+       void createHClusterFile();
+       void convertPhylip2Column(NameAssignment*);
+};
+
+/******************************************************/
+
+#endif
+
index 29c967e0cbed51e375d21e0e755f14b7d1c7bd0d..fccc2cd06f8b045e48e4516229fc58d4ebe3e30d 100644 (file)
-/*
- *  readcolumn.cpp
- *  Mothur
- *
- *  Created by Sarah Westcott on 4/21/09.
- *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "readcolumn.h"
-#include "progress.hpp"
-
-/***********************************************************************/
-
-ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
-       
-       successOpen = openInputFile(distFile, fileHandle);
-       
-}
-
-/***********************************************************************/
-
-void ReadColumnMatrix::read(NameAssignment* nameMap){
-       try {           
-
-               string firstName, secondName;
-               float distance;
-               int nseqs = nameMap->size();
-
-               list = new ListVector(nameMap->getListVector());
-       
-               Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
-
-               int lt = 1;
-               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
-               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
-
-               //need to see if this is a square or a triangular matrix...
-       
-               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
-               
-                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
-       
-                       map<string,int>::iterator itA = nameMap->find(firstName);
-                       map<string,int>::iterator itB = nameMap->find(secondName);
-                       
-                       if(itA == nameMap->end()){
-                               cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
-                       }
-                       if(itB == nameMap->end()){
-                               cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
-                       }
-
-                       if (distance == -1) { distance = 1000000; }
-                       
-                       if(distance < cutoff && itA != itB){
-                               if(itA->second > itB->second){
-                                       PCell value(itA->second, itB->second, distance);
-                       
-                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
-                                               refRow = itA->second;
-                                               refCol = itB->second;
-                                               D->addCell(value);
-                                       }
-                                       else if(refRow == itA->second && refCol == itB->second){
-                                               lt = 0;
-                                       }
-                                       else{
-                                               D->addCell(value);
-                                       }
-                               }
-                               else if(itA->second < itB->second){
-                                       PCell value(itB->second, itA->second, distance);
-                       
-                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
-                                               refRow = itA->second;
-                                               refCol = itB->second;
-                                               D->addCell(value);
-                                       }
-                                       else if(refRow == itB->second && refCol == itA->second){
-                                               lt = 0;
-                                       }
-                                       else{
-                                               D->addCell(value);
-                                       }
-                               }
-                               reading->update(itA->second * nseqs);
-                       }
-                       gobble(fileHandle);
-               }
-
-               if(lt == 0){  // oops, it was square
-                       fileHandle.close();  //let's start over
-                       D->clear();  //let's start over
-                  
-                       openInputFile(distFile, fileHandle);  //let's start over
-
-                       while(fileHandle){
-                               fileHandle >> firstName >> secondName >> distance;
-               
-                               map<string,int>::iterator itA = nameMap->find(firstName);
-                               map<string,int>::iterator itB = nameMap->find(secondName);
-                               
-                               if(itA == nameMap->end()){
-                                       cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
-                               }
-                               if(itB == nameMap->end()){
-                                       cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
-                               }
-                               
-                               if (distance == -1) { distance = 1000000; }
-                               
-                               if(distance < cutoff && itA->second > itB->second){
-                                       PCell value(itA->second, itB->second, distance);
-                                       D->addCell(value);
-                                       reading->update(itA->second * nseqs);
-                               }
-               
-                               gobble(fileHandle);
-                       }
-               }
-
-               reading->finish();
-               fileHandle.close();
-
-               list->setLabel("0");
-
-       }
-       catch(exception& e) {
-               errorOut(e, "ReadColumnMatrix", "read");
-               exit(1);
-       }
-}
-
-/***********************************************************************/
-
-ReadColumnMatrix::~ReadColumnMatrix(){
-       //delete D;
-       //delete list;
-}
-
-
+/*\r
+ *  readcolumn.cpp\r
+ *  Mothur\r
+ *\r
+ *  Created by Sarah Westcott on 4/21/09.\r
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.\r
+ *\r
+ */\r
+\r
+#include "readcolumn.h"\r
+#include "progress.hpp"\r
+\r
+/***********************************************************************/\r
+\r
+ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){\r
+       \r
+       successOpen = openInputFile(distFile, fileHandle);\r
+       \r
+}\r
+\r
+/***********************************************************************/\r
+\r
+void ReadColumnMatrix::read(NameAssignment* nameMap){\r
+       try {           \r
+\r
+               string firstName, secondName;\r
+               float distance;\r
+               int nseqs = nameMap->size();\r
+\r
+               list = new ListVector(nameMap->getListVector());\r
+       \r
+               Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);\r
+\r
+               int lt = 1;\r
+               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose\r
+               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix\r
+\r
+               //need to see if this is a square or a triangular matrix...\r
+       \r
+               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...\r
+               \r
+                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance\r
+       \r
+                       map<string,int>::iterator itA = nameMap->find(firstName);\r
+                       map<string,int>::iterator itB = nameMap->find(secondName);\r
+                       \r
+                       if(itA == nameMap->end()){\r
+                               cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";\r
+                       }\r
+                       if(itB == nameMap->end()){\r
+                               cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n";\r
+                       }\r
+\r
+                       if (distance == -1) { distance = 1000000; }\r
+                       \r
+                       if(distance < cutoff && itA != itB){\r
+                               if(itA->second > itB->second){\r
+                                       PCell value(itA->second, itB->second, distance);\r
+                       \r
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
+                                               refRow = itA->second;\r
+                                               refCol = itB->second;\r
+                                               D->addCell(value);\r
+                                       }\r
+                                       else if(refRow == itA->second && refCol == itB->second){\r
+                                               lt = 0;\r
+                                       }\r
+                                       else{\r
+                                               D->addCell(value);\r
+                                       }\r
+                               }\r
+                               else if(itA->second < itB->second){\r
+                                       PCell value(itB->second, itA->second, distance);\r
+                       \r
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
+                                               refRow = itA->second;\r
+                                               refCol = itB->second;\r
+                                               D->addCell(value);\r
+                                       }\r
+                                       else if(refRow == itB->second && refCol == itA->second){\r
+                                               lt = 0;\r
+                                       }\r
+                                       else{\r
+                                               D->addCell(value);\r
+                                       }\r
+                               }\r
+                               reading->update(itA->second * nseqs);\r
+                       }\r
+                       gobble(fileHandle);\r
+               }\r
+\r
+               if(lt == 0){  // oops, it was square\r
+                       fileHandle.close();  //let's start over\r
+                       D->clear();  //let's start over\r
+                  \r
+                       openInputFile(distFile, fileHandle);  //let's start over\r
+\r
+                       while(fileHandle){\r
+                               fileHandle >> firstName >> secondName >> distance;\r
+               \r
+                               map<string,int>::iterator itA = nameMap->find(firstName);\r
+                               map<string,int>::iterator itB = nameMap->find(secondName);\r
+                               \r
+                               if(itA == nameMap->end()){\r
+                                       cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";\r
+                               }\r
+                               if(itB == nameMap->end()){\r
+                                       cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";\r
+                               }\r
+                               \r
+                               if (distance == -1) { distance = 1000000; }\r
+                               \r
+                               if(distance < cutoff && itA->second > itB->second){\r
+                                       PCell value(itA->second, itB->second, distance);\r
+                                       D->addCell(value);\r
+                                       reading->update(itA->second * nseqs);\r
+                               }\r
+               \r
+                               gobble(fileHandle);\r
+                       }\r
+               }\r
+\r
+               reading->finish();\r
+               fileHandle.close();\r
+\r
+               list->setLabel("0");\r
+\r
+       }\r
+       catch(exception& e) {\r
+               errorOut(e, "ReadColumnMatrix", "read");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+/***********************************************************************/\r
+\r
+ReadColumnMatrix::~ReadColumnMatrix(){\r
+       //delete D;\r
+       //delete list;\r
+}\r
+\r
+\r
index fea6f5934c61a5519086402156369c45da594c20..069632c7267f93e3f95b0f52ba382975f255dbc7 100644 (file)
@@ -22,6 +22,7 @@ public:
 private:
        ifstream fileHandle;
        string distFile;
+       
 };
 
 /******************************************************/
index 263d57ba0e205d186b634d18a10bd4c72d4bbe8e..c4756c3260db6a660526ef09b66f668b25de12ef 100644 (file)
@@ -142,7 +142,10 @@ void ReadDistCommand::help(){
 
 ReadDistCommand::~ReadDistCommand(){
        if (abort == false) {
-               if (format != "matrix") { delete read; delete nameMap; }
+               if (format != "matrix") { 
+                       delete read; 
+                       delete nameMap; 
+               }
        }
 }
 
@@ -174,7 +177,7 @@ int ReadDistCommand::execute(){
                        if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix;  }
                        globaldata->gSparseMatrix = read->getMatrix();
                        numDists = globaldata->gSparseMatrix->getNNodes();
+                       
       int lines = cutoff / (1.0/precision);
       vector<float> dist_cutoff(lines+1,0);
                        for (int i = 0; i <= lines;i++) {       
index 4b267b532ae1a19be791c380339b928212477edd..d5e917c204f1c2757cae52eb5dc8227929851ed1 100644 (file)
@@ -206,6 +206,14 @@ void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
        goodNameOut.close();
        badNameOut.close();
        
+       //we were unable to remove some of the bad sequences
+       if (badSeqNames.size() != 0) {
+               for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                       mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
+                       mothurOutEndLine();
+               }
+       }
+
        if(groupfile != ""){
                
                ifstream inputGroups;
@@ -234,6 +242,14 @@ void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
                inputGroups.close();
                goodGroupOut.close();
                badGroupOut.close();
+               
+               //we were unable to remove some of the bad sequences
+               if (badSeqGroups.size() != 0) {
+                       for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {  
+                               mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
+                               mothurOutEndLine();
+                       }
+               }
        }
 }
 
@@ -265,6 +281,15 @@ void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
                }
                gobble(inputGroups);
        }
+       
+       //we were unable to remove some of the bad sequences
+       if (badSeqNames.size() != 0) {
+               for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                       mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
+                       mothurOutEndLine();
+               }
+       }
+       
        inputGroups.close();
        goodGroupOut.close();
        badGroupOut.close();
@@ -312,6 +337,15 @@ void ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
                }
                gobble(inputAlignReport);
        }
+       
+       //we were unable to remove some of the bad sequences
+       if (badSeqNames.size() != 0) {
+               for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                       mothurOut("Your file does not include the sequence " + *it + " please correct."); 
+                       mothurOutEndLine();
+               }
+       }
+
        inputAlignReport.close();
        goodAlignReportOut.close();
        badAlignReportOut.close();
index 62d8b3a3cf3f07a42a1d1f77c3ce21d57d526cdb..727a04b273fc3ee4b4c9c5882f55e746ad2c0d6a 100644 (file)
@@ -52,6 +52,7 @@ int SharedCommand::execute(){
                
                //lookup.clear();
                string errorOff = "no error";
+               //errorOff = "";
                        
                //read in listfile
                read = new ReadOTUFile(globaldata->inputFileName);      
@@ -83,8 +84,7 @@ int SharedCommand::execute(){
                
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
-               set<string> userLabels = globaldata->labels;
-               
+               set<string> userLabels = globaldata->labels;    
                
                while((SharedList != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
                        
@@ -102,6 +102,8 @@ int SharedCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = SharedList->getLabel();
+                                       
                                        delete SharedList;
                                        SharedList = input->getSharedListVector(lastLabel); //get new list vector to process
                                        
@@ -113,6 +115,9 @@ int SharedCommand::execute(){
                                        
                                        processedLabels.insert(SharedList->getLabel());
                                        userLabels.erase(SharedList->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       SharedList->setLabel(saveLabel);
                        }
                        
                
@@ -166,6 +171,7 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
                
                //initialize bin values
                for (int i = 0; i < thislookup.size(); i++) {
+//cout << "in printData " << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() <<  endl;
                        out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
                        thislookup[i]->print(out);
                        
index 95734c74808a3addd63b86ba8151c1f3d7bf039d..8e9eb56c8e55bddd9405cc790759c45f2ed7dc12 100644 (file)
@@ -45,7 +45,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                                
                                                float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
                                                float snpRateRight = numSNPSRight / (float) winSizeRight;
-                                               float logR = log(snpRateLeft / snpRateRight) / log(2);
+                                               float logR = log(snpRateLeft / snpRateRight) / log(2.0);
                                                
                                                // do not accept excess snp ratio on either side of the break
                                                if (abs(logR) < 1 ) {  
index 8a873f74c2441c4dd50f20450ebe4882227ce32b..7477bd9365fa2f8c77319c0b6c0cb1f94ac85f24 100644 (file)
@@ -153,7 +153,7 @@ PCell* SparseMatrix::getSmallestCell(){
                                }
 
                        }
-//                     random_shuffle(mins.begin(), mins.end());  //randomize the order of the iterators in the mins vector
+                       random_shuffle(mins.begin(), mins.end());  //randomize the order of the iterators in the mins vector
 
                        for(int i=0;i<mins.size();i++){
                                mins[i]->vectorMap = &mins[i];  //assign vectorMap to the address for the container
index a2f79f9b0322670e009cf916c0ce8f05a61c5642..51fabf88828eb10617595b10cf8bf6a95396212f 100644 (file)
@@ -236,6 +236,8 @@ int SummaryCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = sabund->getLabel();
+                               
                                delete sabund;
                                sabund = input->getSAbundVector(lastLabel);
                                
@@ -250,6 +252,9 @@ int SummaryCommand::execute(){
                                        sumCalculators[i]->print(outputFileHandle);
                                }
                                outputFileHandle << endl;
+                               
+                               //restore real lastlabel to save below
+                               sabund->setLabel(saveLabel);
                        }               
 
                        lastLabel = sabund->getLabel();                 
index ef0485c61886e6f14ae3135f908bf8317855130f..575c0ded2d1a871289fecaefe96acb5fd1a36f70 100644 (file)
@@ -267,6 +267,8 @@ int SummarySharedCommand::execute(){
                        }
                        
                        if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = lookup[0]->getLabel();
+                                       
                                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                                        lookup = input->getSharedRAbundVectors(lastLabel);
 
@@ -275,9 +277,10 @@ int SummarySharedCommand::execute(){
                                        
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       lookup[0]->setLabel(saveLabel);
                        }
-
-               
                        
                        lastLabel = lookup[0]->getLabel();                      
                                
index e15d56ba82268e2b5d8d95b4f5bb562df4c70868..c115c54ce1bcb39786ec7c54fcd4ba11df3115ab 100644 (file)
@@ -443,6 +443,8 @@ void TreeGroupCommand::makeSimsShared() {
                        }
                        
                        if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookup[0]->getLabel();
+                       
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                                lookup = input->getSharedRAbundVectors(lastLabel);
 
@@ -451,6 +453,9 @@ void TreeGroupCommand::makeSimsShared() {
                                        
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
                        }
 
                        lastLabel = lookup[0]->getLabel();                      
index b6a53e67eaa8947ffd6d4b48b26e11fe23d7bdc4..e26dbcc5f6e926be9531dd7c719fcf79abfd13a3 100644 (file)
@@ -223,6 +223,8 @@ int VennCommand::execute(){
                                }
                                
                                if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = lookup[0]->getLabel();
+                                       
                                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                                        lookup = input->getSharedRAbundVectors(lastLabel);
 
@@ -235,6 +237,9 @@ int VennCommand::execute(){
                                                for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
                                        }                               
                                        venn->getPic(lookup, vennCalculators);
+                                       
+                                       //restore real lastlabel to save below
+                                       lookup[0]->setLabel(saveLabel);
                                }
                                
                                
@@ -293,6 +298,8 @@ int VennCommand::execute(){
                                }
                                
                                if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = sabund->getLabel();
+                               
                                        delete sabund;
                                        sabund = input->getSAbundVector(lastLabel);
                                        
@@ -301,6 +308,9 @@ int VennCommand::execute(){
                                        
                                        processedLabels.insert(sabund->getLabel());
                                        userLabels.erase(sabund->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       sabund->setLabel(saveLabel);
                                }               
                                
                                lastLabel = sabund->getLabel();