added sparseDistanceMatrix class. Modified cluster commands to use the new sparse...

[mothur.git] / clustersplitcommand.cpp
diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp

index dc913d8c8895522a39b5ee0bd22be15a82eb7a74..b097f382024d5be9c56bd346723e398b10ef3725 100644 (file)
--- a/clustersplitcommand.cpp
+++ b/clustersplitcommand.cpp
@@ -8,12 +8,6 @@
   */
  
  #include "clustersplitcommand.h"
-#include "readcluster.h"
-#include "splitmatrix.h"
-#include "readphylip.h"
-#include "readcolumn.h"
-#include "readmatrix.hpp"
-#include "inputdata.h"
  
  
  //**********************************************************************************************************************
@@ -24,16 +18,17 @@ vector<string> ClusterSplitCommand::setParameters(){
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
-               CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
+               CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "",false,false); parameters.push_back(ptaxlevel);
                 CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
                 CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
                 CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.25", "", "", "",false,false); parameters.push_back(pcutoff);
                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
                 CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
+        CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pclassic);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                         
@@ -60,13 +55,14 @@ string ClusterSplitCommand::getHelpString(){
                 helpString += "The phylip and column parameter allow you to enter your distance file. \n";
                 helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
                 helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
-               helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
+               helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n";
                 helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
                 helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
                 helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
                 helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
-               helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
+               helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
+        helpString += "The classic parameter allows you to indicate that you want to run your files with cluster.classic.  It is only valid with splitmethod=fasta. Default=f.\n";
  #ifdef USE_MPI
                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
  #endif
@@ -81,6 +77,29 @@ string ClusterSplitCommand::getHelpString(){
         }
  }
  //**********************************************************************************************************************
+string ClusterSplitCommand::getOutputFileNameTag(string type, string inputName=""){    
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "list") {  outputFileName =  "list"; }
+            else if (type == "rabund") {  outputFileName =  "rabund"; }
+            else if (type == "sabund") {  outputFileName =  "sabund"; }
+            else if (type == "column") {  outputFileName =  "dist"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
  ClusterSplitCommand::ClusterSplitCommand(){    
         try {
                 abort = true; calledHelp = true; 
@@ -201,9 +220,9 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                         else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
                         
                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxFile == "not open") { abort = true; }    
+                       if (taxFile == "not open") { taxFile = ""; abort = true; }      
                         else if (taxFile == "not found") { taxFile = ""; }
-                       else {  m->setTaxonomyFile(taxFile); }
+                       else {  m->setTaxonomyFile(taxFile); if (splitmethod != "fasta") { splitmethod = "classify"; } }
                         
                         if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { 
                                 //is there are current file available for either of these?
@@ -264,34 +283,39 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                         if (temp == "not found") { temp = "100"; }
                         //saves precision legnth for formatting below
                         length = temp.length();
-                       convert(temp, precision); 
+                       m->mothurConvert(temp, precision); 
                         
                         temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
                         hard = m->isTrue(temp);
                         
                         temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
                         large = m->isTrue(temp);
-                       
+            
                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                         m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
                         
                         temp = validParameter.validFile(parameters, "splitmethod", false);      
-                       if (splitmethod != "fasta") {
+                       if ((splitmethod != "fasta") && (splitmethod != "classify")) {
                                 if (temp == "not found")  { splitmethod = "distance"; }
                                 else {  splitmethod = temp; }
                         }
                         
-                       temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
-                       convert(temp, cutoff); 
+            temp = validParameter.validFile(parameters, "classic", false);                     if (temp == "not found") { temp = "F"; }
+                       classic = m->isTrue(temp);
+            
+            if ((splitmethod != "fasta") && classic) { m->mothurOut("splitmethod must be fasta to use cluster.classic.\n"); abort=true; }
+
+                       temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "0.25"; }
+                       m->mothurConvert(temp, cutoff); 
                         cutoff += (5 / (precision * 10.0));  
                         
-                       temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
-                       convert(temp, taxLevelCutoff); 
+                       temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "3"; }
+                       m->mothurConvert(temp, taxLevelCutoff); 
                         
                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
                         
-                       if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
+                       if ((method == "furthest") || (method == "nearest") || (method == "average")) { m->mothurOut("Using splitmethod " + splitmethod + ".\n"); }
                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
                         
                         if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
@@ -379,7 +403,7 @@ int ClusterSplitCommand::execute(){
                 SplitMatrix* split;
                 if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
                 else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
-               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
+               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, classic, outputDir);     }
                 else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
                 
                 split->split();
@@ -555,74 +579,16 @@ int ClusterSplitCommand::execute(){
                 MPI_Barrier(MPI_COMM_WORLD);
                 
         #else
-               
+               ///////////////////// WINDOWS CAN ONLY USE 1 PROCESSORS ACCESS VIOLATION UNRESOLVED ///////////////////////
                 //sanity check
                 if (processors > distName.size()) { processors = distName.size(); }
                 
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                 if(processors == 1){
                                         listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                                 }else{
-                                       
-                                       cout << processors << '\t' << distName.size() << endl;
-                                       vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
-                                       dividedNames.resize(processors);
-                                       
-                                       //for each file group figure out which process will complete it
-                                       //want to divide the load intelligently so the big files are spread between processes
-                                       for (int i = 0; i < distName.size(); i++) { 
-                                               cout << i << endl;
-                                               int processToAssign = (i+1) % processors; 
-                                               if (processToAssign == 0) { processToAssign = processors; }
-                                               
-                                               dividedNames[(processToAssign-1)].push_back(distName[i]);
-                                       }
-                                       
-                                       //not lets reverse the order of ever other process, so we balance big files running with little ones
-                                       for (int i = 0; i < processors; i++) {
-                                               cout << i << endl;
-                                               int remainder = ((i+1) % processors);
-                                               if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
-                                       }
-                                       
-                                       createProcesses(dividedNames);
-                                                       
-                                       if (m->control_pressed) { return 0; }
-
-                                       //get list of list file names from each process
-                                       for(int i=0;i<processors;i++){
-                                               string filename = toString(processIDS[i]) + ".temp";
-                                               ifstream in;
-                                               m->openInputFile(filename, in);
-                                               
-                                               in >> tag; m->gobble(in);
-                                               
-                                               while(!in.eof()) {
-                                                       string tempName;
-                                                       in >> tempName; m->gobble(in);
-                                                       listFileNames.push_back(tempName);
-                                               }
-                                               in.close();
-                                               m->mothurRemove((toString(processIDS[i]) + ".temp"));
-                                               
-                                               //get labels
-                                               filename = toString(processIDS[i]) + ".temp.labels";
-                                               ifstream in2;
-                                               m->openInputFile(filename, in2);
-                                               
-                                               float tempCutoff;
-                                               in2 >> tempCutoff; m->gobble(in2);
-                                               if (tempCutoff < cutoff) { cutoff = tempCutoff; }
-                                               
-                                               while(!in2.eof()) {
-                                                       string tempName;
-                                                       in2 >> tempName; m->gobble(in2);
-                                                       if (labels.count(tempName) == 0) { labels.insert(tempName); }
-                                               }
-                                               in2.close();
-                                               m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
-                                       }
-                               }
+                                       listFileNames = createProcesses(distName, labels);
+                }
                 #else
                                 listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                 #endif
@@ -807,14 +773,17 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                 if (outputDir == "") { outputDir += m->hasPath(distfile); }
                 fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
                 
-               m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
-               m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
-               m->openOutputFile(fileroot+ tag + ".list",              outList);
-                               
-               outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
-               outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
-               outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
+        string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
+        string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
+        string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
+        
+               m->openOutputFile(sabundFileName,       outSabund);
+               m->openOutputFile(rabundFileName,       outRabund);
+               m->openOutputFile(listFileName, outList);
                 
+               outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+               outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+               outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);               
                 map<float, int>::iterator itLabel;
  
                 //for each label needed
@@ -904,12 +873,35 @@ void ClusterSplitCommand::printData(ListVector* oldList){
         }
  }
  //**********************************************************************************************************************
-int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> > > dividedNames){
+vector<string>  ClusterSplitCommand::createProcesses(vector< map<string, string> > distName, set<string>& labels){
         try {
+        
+        vector<string> listFiles;
+        vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
+        dividedNames.resize(processors);
+        
+        //for each file group figure out which process will complete it
+        //want to divide the load intelligently so the big files are spread between processes
+        for (int i = 0; i < distName.size(); i++) { 
+            //cout << i << endl;
+            int processToAssign = (i+1) % processors; 
+            if (processToAssign == 0) { processToAssign = processors; }
+            
+            dividedNames[(processToAssign-1)].push_back(distName[i]);
+            if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); }
+        }
+        
+        //not lets reverse the order of ever other process, so we balance big files running with little ones
+        for (int i = 0; i < processors; i++) {
+            //cout << i << endl;
+            int remainder = ((i+1) % processors);
+            if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
+        }
+        
+        if (m->control_pressed) { return listFiles; }
         
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               int exitCommand = 1;
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               int process = 1;
                 processIDS.clear();
                 
                 //loop through and create all the processes you want
@@ -950,14 +942,99 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
                         }
                 }
                 
+        //do your part
+        listFiles = cluster(dividedNames[0], labels);
+        
                 //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
+               for (int i=0;i< processIDS.size();i++) { 
                         int temp = processIDS[i];
                         wait(&temp);
                 }
+        
+        //get list of list file names from each process
+        for(int i=0;i<processIDS.size();i++){
+            string filename = toString(processIDS[i]) + ".temp";
+            ifstream in;
+            m->openInputFile(filename, in);
+            
+            in >> tag; m->gobble(in);
+            
+            while(!in.eof()) {
+                string tempName;
+                in >> tempName; m->gobble(in);
+                listFiles.push_back(tempName);
+            }
+            in.close();
+            m->mothurRemove((toString(processIDS[i]) + ".temp"));
+            
+            //get labels
+            filename = toString(processIDS[i]) + ".temp.labels";
+            ifstream in2;
+            m->openInputFile(filename, in2);
+            
+            float tempCutoff;
+            in2 >> tempCutoff; m->gobble(in2);
+            if (tempCutoff < cutoff) { cutoff = tempCutoff; }
+            
+            while(!in2.eof()) {
+                string tempName;
+                in2 >> tempName; m->gobble(in2);
+                if (labels.count(tempName) == 0) { labels.insert(tempName); }
+            }
+            in2.close();
+            m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
+        }
+        
+
+    #else
+       
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the clusterData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //Taking advantage of shared memory to allow both threads to add labels.
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<clusterData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
                 
-               return exitCommand;
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+                       // Allocate memory for thread data.
+                       clusterData* tempCluster = new clusterData(dividedNames[i], m, cutoff, method, outputDir, hard, precision, length, i);
+                       pDataArray.push_back(tempCluster);
+                       processIDS.push_back(i);
+            
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyClusterThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);  
+            
+               }
+        
+        //do your part
+        listFiles = cluster(dividedNames[0], labels);
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            //get tag
+            tag = pDataArray[i]->tag;
+            //get listfiles created
+            for(int j=0; j < pDataArray[i]->listFiles.size(); j++){ listFiles.push_back(pDataArray[i]->listFiles[j]); }
+            //get labels
+            set<string>::iterator it;
+            for(it = pDataArray[i]->labels.begin(); it != pDataArray[i]->labels.end(); it++){ labels.insert(*it); }
+                       //check cutoff
+            if (pDataArray[i]->cutoff < cutoff) { cutoff = pDataArray[i]->cutoff; }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+
         #endif          
+        
+        return listFiles;
         
         }
         catch(exception& e) {
@@ -969,153 +1046,27 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
  
  vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNames, set<string>& labels){
         try {
-               Cluster* cluster;
-               SparseMatrix* matrix;
-               ListVector* list;
-               ListVector oldList;
-               RAbundVector* rabund;
                 
                 vector<string> listFileNames;
-               
                 double smallestCutoff = cutoff;
                 
                 //cluster each distance file
                 for (int i = 0; i < distNames.size(); i++) {
-                       if (m->control_pressed) { return listFileNames; }
-                       
+            
                         string thisNamefile = distNames[i].begin()->second;
                         string thisDistFile = distNames[i].begin()->first;
-                                               
-                       #ifdef USE_MPI
-                               int pid;
-                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                               
-                               //output your files too
-                               if (pid != 0) {
-                                       cout << endl << "Reading " << thisDistFile << endl;
-                               }
-                       #endif
-                       
-                       m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
-                       
-                       ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
-                       read->setCutoff(cutoff);
-
-                       NameAssignment* nameMap = new NameAssignment(thisNamefile);
-                       nameMap->readMap();
-                       read->read(nameMap);
-                       
-                       if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
-                       
-                       list = read->getListVector();
-                       oldList = *list;
-                       matrix = read->getMatrix();
-                       
-                       delete read; 
-                       delete nameMap; 
                         
-                       
-                       #ifdef USE_MPI
-                               //output your files too
-                               if (pid != 0) {
-                                       cout << endl << "Clustering " << thisDistFile << endl;
-                               }
-                       #endif
-                       
-                       m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
-               
-                       rabund = new RAbundVector(list->getRAbundVector());
-                       
-                       //create cluster
-                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
-                       tag = cluster->getTag();
-               
-                       if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
-                       fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
-                       
-                       ofstream listFile;
-                       m->openOutputFile(fileroot+ tag + ".list",      listFile);
-               
-                       listFileNames.push_back(fileroot+ tag + ".list");
-                               
-                       float previousDist = 0.00000;
-                       float rndPreviousDist = 0.00000;
-                       
-                       oldList = *list;
-
-                       print_start = true;
-                       start = time(NULL);
-                       double saveCutoff = cutoff;
-               
-                       while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
-               
-                               if (m->control_pressed) { //clean up
-                                       delete matrix; delete list;     delete cluster; delete rabund;
-                                       listFile.close();
-                                       for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
-                                       listFileNames.clear(); return listFileNames;
-                               }
-               
-                               cluster->update(saveCutoff);
-       
-                               float dist = matrix->getSmallDist();
-                               float rndDist;
-                               if (hard) {
-                                       rndDist = m->ceilDist(dist, precision); 
-                               }else{
-                                       rndDist = m->roundDist(dist, precision); 
-                               }
-
-                               if(previousDist <= 0.0000 && dist != previousDist){
-                                       oldList.setLabel("unique");
-                                       oldList.print(listFile);
-                                       if (labels.count("unique") == 0) {  labels.insert("unique");  }
-                               }
-                               else if(rndDist != rndPreviousDist){
-                                       oldList.setLabel(toString(rndPreviousDist,  length-1));
-                                       oldList.print(listFile);
-                                       if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
-                               }
-               
-                               previousDist = dist;
-                               rndPreviousDist = rndDist;
-                               oldList = *list;
-                       }
+                       string listFileName = "";
+            if (classic)    {  listFileName = clusterClassicFile(thisDistFile, thisNamefile, labels, smallestCutoff);   }
+            else            {  listFileName = clusterFile(thisDistFile, thisNamefile, labels, smallestCutoff);          }
  
-               
-                       if(previousDist <= 0.0000){
-                               oldList.setLabel("unique");
-                               oldList.print(listFile);
-                               if (labels.count("unique") == 0) { labels.insert("unique"); }
-                       }
-                       else if(rndPreviousDist<cutoff){
-                               oldList.setLabel(toString(rndPreviousDist,  length-1));
-                               oldList.print(listFile);
-                               if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
-                       }
-       
-                       delete matrix; delete list;     delete cluster; delete rabund; 
-                       listFile.close();
-                       
                         if (m->control_pressed) { //clean up
                                 for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
                                 listFileNames.clear(); return listFileNames;
                         }
-                       
-                       m->mothurRemove(thisDistFile);
-                       m->mothurRemove(thisNamefile);
-                       
-                       if (saveCutoff != cutoff) { 
-                               if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
-                               else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
-                       
-                               m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
-                       }
-                       
-                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
-               }
+            
+            listFileNames.push_back(listFileName);
+        }
                 
                 cutoff = smallestCutoff;
                                         
@@ -1130,6 +1081,269 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
  
  }
  //**********************************************************************************************************************
+string ClusterSplitCommand::clusterClassicFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+       try {
+        string listFileName = "";
+        
+        ListVector* list = NULL;
+        ListVector oldList;
+        RAbundVector* rabund = NULL;
+        
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Reading " << thisDistFile << endl;
+        }
+#endif
+
+        m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+        
+        NameAssignment* nameMap = new NameAssignment(thisNamefile);
+        nameMap->readMap();
+                               
+               //reads phylip file storing data in 2D vector, also fills list and rabund
+        bool sim = false;
+               ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
+               cluster->readPhylipFile(thisDistFile, nameMap);
+               tag = cluster->getTag();
+        
+               if (m->control_pressed) { delete cluster; return 0; }
+               
+               list = cluster->getListVector();
+               rabund = cluster->getRAbundVector();
+        
+               if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+               fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+        listFileName = fileroot+ tag + ".list";
+        
+        ofstream listFile;
+               m->openOutputFile(fileroot+ tag + ".list",      listFile);
+               
+               float previousDist = 0.00000;
+               float rndPreviousDist = 0.00000;
+               oldList = *list;
+               
+#ifdef USE_MPI
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Clustering " << thisDistFile << endl;
+        }
+#endif
+
+        m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+        
+               while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
+                       if (m->control_pressed) { delete cluster; delete list; delete rabund; listFile.close();  return listFileName;  }
+            
+                       cluster->update(cutoff);
+            
+                       float dist = cluster->getSmallDist();
+                       float rndDist;
+                       if (hard) {
+                               rndDist = m->ceilDist(dist, precision); 
+                       }else{
+                               rndDist = m->roundDist(dist, precision); 
+                       }
+            
+            if(previousDist <= 0.0000 && dist != previousDist){
+                oldList.setLabel("unique");
+                oldList.print(listFile);
+                if (labels.count("unique") == 0) {  labels.insert("unique");  }
+            }
+            else if(rndDist != rndPreviousDist){
+                oldList.setLabel(toString(rndPreviousDist,  length-1));
+                oldList.print(listFile);
+                if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+            }
+
+            
+                       previousDist = dist;
+                       rndPreviousDist = rndDist;
+                       oldList = *list;
+               }
+        
+               if(previousDist <= 0.0000){
+            oldList.setLabel("unique");
+            oldList.print(listFile);
+            if (labels.count("unique") == 0) { labels.insert("unique"); }
+        }
+        else if(rndPreviousDist<cutoff){
+            oldList.setLabel(toString(rndPreviousDist,  length-1));
+            oldList.print(listFile);
+            if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+        }
+
+        
+               listFile.close();
+               
+               delete cluster; delete nameMap; delete list; delete rabund;
+
+        
+        return listFileName;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "clusterClassicFile");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+       try {
+        string listFileName = "";
+        
+        Cluster* cluster = NULL;
+        SparseDistanceMatrix* matrix = NULL;
+        ListVector* list = NULL;
+        ListVector oldList;
+        RAbundVector* rabund = NULL;
+        
+        if (m->control_pressed) { return listFileName; }
+        
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Reading " << thisDistFile << endl;
+        }
+#endif
+        
+        m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+        
+        ReadMatrix* read = new ReadColumnMatrix(thisDistFile);         
+        read->setCutoff(cutoff);
+        
+        NameAssignment* nameMap = new NameAssignment(thisNamefile);
+        nameMap->readMap();
+        read->read(nameMap);
+        
+        if (m->control_pressed) {  delete read; delete nameMap; return listFileName; }
+        
+        list = read->getListVector();
+        oldList = *list;
+        matrix = read->getDMatrix();
+        
+        delete read;  read = NULL;
+        delete nameMap; nameMap = NULL;
+        
+        
+#ifdef USE_MPI
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Clustering " << thisDistFile << endl;
+        }
+#endif
+        
+        m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+               
+        rabund = new RAbundVector(list->getRAbundVector());
+        
+        //create cluster
+        if (method == "furthest")      {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
+        else if(method == "nearest"){  cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
+        else if(method == "average"){  cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
+        tag = cluster->getTag();
+               
+        if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+        fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+        
+        ofstream listFile;
+        m->openOutputFile(fileroot+ tag + ".list",     listFile);
+               
+        listFileName = fileroot+ tag + ".list";
+        
+        float previousDist = 0.00000;
+        float rndPreviousDist = 0.00000;
+        
+        oldList = *list;
+        
+        print_start = true;
+        start = time(NULL);
+        double saveCutoff = cutoff;
+               
+        while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+            
+            if (m->control_pressed) { //clean up
+                delete matrix; delete list;    delete cluster; delete rabund;
+                listFile.close();
+                m->mothurRemove(listFileName);         
+                return listFileName;
+            }
+            
+            cluster->update(saveCutoff);
+            
+            float dist = matrix->getSmallDist();
+            float rndDist;
+            if (hard) {
+                rndDist = m->ceilDist(dist, precision); 
+            }else{
+                rndDist = m->roundDist(dist, precision); 
+            }
+            
+            if(previousDist <= 0.0000 && dist != previousDist){
+                oldList.setLabel("unique");
+                oldList.print(listFile);
+                if (labels.count("unique") == 0) {  labels.insert("unique");  }
+            }
+            else if(rndDist != rndPreviousDist){
+                oldList.setLabel(toString(rndPreviousDist,  length-1));
+                oldList.print(listFile);
+                if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+            }
+            
+            previousDist = dist;
+            rndPreviousDist = rndDist;
+            oldList = *list;
+        }
+        
+               
+        if(previousDist <= 0.0000){
+            oldList.setLabel("unique");
+            oldList.print(listFile);
+            if (labels.count("unique") == 0) { labels.insert("unique"); }
+        }
+        else if(rndPreviousDist<cutoff){
+            oldList.setLabel(toString(rndPreviousDist,  length-1));
+            oldList.print(listFile);
+            if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+        }
+        
+        delete matrix; delete list;    delete cluster; delete rabund; 
+        matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
+        listFile.close();
+        
+        if (m->control_pressed) { //clean up
+            m->mothurRemove(listFileName);     
+            return listFileName;
+        }
+        
+        m->mothurRemove(thisDistFile);
+        m->mothurRemove(thisNamefile);
+        
+        if (saveCutoff != cutoff) { 
+            if (hard)  {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+            else               {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       
+            m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
+        }
+        
+        if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
+        
+        return listFileName;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "clusterFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
  
  int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
         try{
@@ -1143,7 +1357,7 @@ int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> >
                 
                 string thisOutputDir = outputDir;
                 if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("column");
                 m->mothurRemove(outputFileName);