]> git.donarmstrong.com Git - mothur.git/commitdiff
added classic parameter to cluster.split. working on make.contigs command. fixed...
authorSarah Westcott <mothur.westcott@gmail.com>
Wed, 16 May 2012 14:04:50 +0000 (10:04 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Wed, 16 May 2012 14:04:50 +0000 (10:04 -0400)
alignment.cpp
clusterclassic.cpp
clusterclassic.h
clustersplitcommand.cpp
clustersplitcommand.h
makecontigscommand.cpp
makecontigscommand.h
splitmatrix.cpp
splitmatrix.h

index 235216e8e59c82be596004559445c021312acb40..03bf9bab62d34b657f1b4cb0dc076171e6d8d9d6 100644 (file)
@@ -15,7 +15,7 @@
 
 /**************************************************************************************************/
 
-Alignment::Alignment() {       /*      do nothing      */      }
+Alignment::Alignment() {       m = MothurOut::getInstance(); /*        do nothing      */      }
 
 /**************************************************************************************************/
 
index 1ce81c4f2affda469164709f862bb36ff023c163..f0bae59ab33de8c2f451b80c7180a33c49c28a54 100644 (file)
@@ -19,6 +19,10 @@ ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), smallDist
                cutoff = c;
                aboveCutoff = cutoff + 10000.0;
                m = MothurOut::getInstance();
+        if(method == "furthest")        {   tag = "fn";   }
+        else if (method == "average")   {   tag = "an";   }
+        else if (method == "weighted")  {   tag = "wn";   }        
+        else if (method == "nearest")   {   tag = "nn";   }
        }
        catch(exception& e) {
                m->errorOut(e, "ClusterClassic", "ClusterClassic");
index 110c0408e5107e9acebd322c7a1070f364c13a7d..932c806ec42ac52a4a90670239d0c4a6a27b1efb 100644 (file)
@@ -1,8 +1,7 @@
-#ifndef CLUSTER_H
-#define CLUSTER_H
+#ifndef CLUSTERCLASSIC_H
+#define CLUSTERCLASSIC_H
 
 
-#include "mothur.h"
 #include "mothurout.h"
 #include "listvector.hpp"
 #include "rabundvector.hpp"
@@ -28,7 +27,7 @@ public:
        int getNSeqs() { return nseqs; }        
        ListVector* getListVector() { return list; }
        RAbundVector* getRAbundVector() { return rabund; }              
-       string getTag();
+       string getTag() { return tag; }
        void setMapWanted(bool m);  
        map<string, int> getSeqtoBin()  {  return seq2Bin;      }
 
@@ -57,7 +56,7 @@ private:
        bool mapWanted, sim;
        double cutoff, aboveCutoff;
        map<string, int> seq2Bin;
-       string method;
+       string method, tag;
        
        MothurOut* m;
 };
index 9cdd7b065b857e5ddb7eaacbc27e4c3af84a43cb..2eee59f6165e28c50f86fd6ab9b1db0920135040 100644 (file)
@@ -10,7 +10,6 @@
 #include "clustersplitcommand.h"
 
 
-
 //**********************************************************************************************************************
 vector<string> ClusterSplitCommand::setParameters(){   
        try {
@@ -29,6 +28,7 @@ vector<string> ClusterSplitCommand::setParameters(){
                CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
                CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
                CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
+        CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pclassic);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                        
@@ -62,6 +62,7 @@ string ClusterSplitCommand::getHelpString(){
                helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
                helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
                helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
+        helpString += "The classic parameter allows you to indicate that you want to run your files with cluster.classic.  It is only valid with splitmethod=fasta. Default=f.\n";
 #ifdef USE_MPI
                helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
 #endif
@@ -266,7 +267,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
                        large = m->isTrue(temp);
-                       
+            
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
@@ -277,6 +278,11 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                else {  splitmethod = temp; }
                        }
                        
+            temp = validParameter.validFile(parameters, "classic", false);                     if (temp == "not found") { temp = "F"; }
+                       classic = m->isTrue(temp);
+            
+            if ((splitmethod != "fasta") && classic) { m->mothurOut("splitmethod must be fasta to use cluster.classic.\n"); abort=true; }
+
                        temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "0.25"; }
                        m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));  
@@ -374,7 +380,7 @@ int ClusterSplitCommand::execute(){
                SplitMatrix* split;
                if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
                else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
-               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
+               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, classic, outputDir);     }
                else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
                
                split->split();
@@ -1021,148 +1027,20 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                //cluster each distance file
                for (int i = 0; i < distNames.size(); i++) {
             
-            Cluster* cluster = NULL;
-            SparseMatrix* matrix = NULL;
-            ListVector* list = NULL;
-            ListVector oldList;
-            RAbundVector* rabund = NULL;
-            
-                       if (m->control_pressed) { return listFileNames; }
-                       
                        string thisNamefile = distNames[i].begin()->second;
                        string thisDistFile = distNames[i].begin()->first;
-                                               
-                       #ifdef USE_MPI
-                               int pid;
-                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                               
-                               //output your files too
-                               if (pid != 0) {
-                                       cout << endl << "Reading " << thisDistFile << endl;
-                               }
-                       #endif
-                       
-                       m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
-                       
-                       ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
-                       read->setCutoff(cutoff);
-
-                       NameAssignment* nameMap = new NameAssignment(thisNamefile);
-                       nameMap->readMap();
-                       read->read(nameMap);
-                       
-                       if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
-                       
-                       list = read->getListVector();
-                       oldList = *list;
-                       matrix = read->getMatrix();
-                       
-                       delete read;  read = NULL;
-                       delete nameMap; nameMap = NULL;
-                       
-                       
-                       #ifdef USE_MPI
-                               //output your files too
-                               if (pid != 0) {
-                                       cout << endl << "Clustering " << thisDistFile << endl;
-                               }
-                       #endif
                        
-                       m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
-               
-                       rabund = new RAbundVector(list->getRAbundVector());
-                       
-                       //create cluster
-                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
-                       tag = cluster->getTag();
-               
-                       if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
-                       fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
-                       
-                       ofstream listFile;
-                       m->openOutputFile(fileroot+ tag + ".list",      listFile);
-               
-                       listFileNames.push_back(fileroot+ tag + ".list");
-                               
-                       float previousDist = 0.00000;
-                       float rndPreviousDist = 0.00000;
-                       
-                       oldList = *list;
-
-                       print_start = true;
-                       start = time(NULL);
-                       double saveCutoff = cutoff;
-               
-                       while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
-               
-                               if (m->control_pressed) { //clean up
-                                       delete matrix; delete list;     delete cluster; delete rabund;
-                                       listFile.close();
-                                       for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
-                                       listFileNames.clear(); return listFileNames;
-                               }
-               
-                               cluster->update(saveCutoff);
-       
-                               float dist = matrix->getSmallDist();
-                               float rndDist;
-                               if (hard) {
-                                       rndDist = m->ceilDist(dist, precision); 
-                               }else{
-                                       rndDist = m->roundDist(dist, precision); 
-                               }
-
-                               if(previousDist <= 0.0000 && dist != previousDist){
-                                       oldList.setLabel("unique");
-                                       oldList.print(listFile);
-                                       if (labels.count("unique") == 0) {  labels.insert("unique");  }
-                               }
-                               else if(rndDist != rndPreviousDist){
-                                       oldList.setLabel(toString(rndPreviousDist,  length-1));
-                                       oldList.print(listFile);
-                                       if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
-                               }
-               
-                               previousDist = dist;
-                               rndPreviousDist = rndDist;
-                               oldList = *list;
-                       }
+                       string listFileName = "";
+            if (classic)    {  listFileName = clusterClassicFile(thisDistFile, thisNamefile, labels, smallestCutoff);   }
+            else            {  listFileName = clusterFile(thisDistFile, thisNamefile, labels, smallestCutoff);          }
 
-               
-                       if(previousDist <= 0.0000){
-                               oldList.setLabel("unique");
-                               oldList.print(listFile);
-                               if (labels.count("unique") == 0) { labels.insert("unique"); }
-                       }
-                       else if(rndPreviousDist<cutoff){
-                               oldList.setLabel(toString(rndPreviousDist,  length-1));
-                               oldList.print(listFile);
-                               if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
-                       }
-       
-                       delete matrix; delete list;     delete cluster; delete rabund; 
-            matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
-                       listFile.close();
-                       
                        if (m->control_pressed) { //clean up
                                for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
                                listFileNames.clear(); return listFileNames;
                        }
-                       
-                       m->mothurRemove(thisDistFile);
-                       m->mothurRemove(thisNamefile);
-                       
-                       if (saveCutoff != cutoff) { 
-                               if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
-                               else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
-                       
-                               m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
-                       }
-                       
-                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
-               }
+            
+            listFileNames.push_back(listFileName);
+        }
                
                cutoff = smallestCutoff;
                                        
@@ -1177,6 +1055,269 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
 
 }
 //**********************************************************************************************************************
+string ClusterSplitCommand::clusterClassicFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+       try {
+        string listFileName = "";
+        
+        ListVector* list = NULL;
+        ListVector oldList;
+        RAbundVector* rabund = NULL;
+        
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Reading " << thisDistFile << endl;
+        }
+#endif
+
+        m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+        
+        NameAssignment* nameMap = new NameAssignment(thisNamefile);
+        nameMap->readMap();
+                               
+               //reads phylip file storing data in 2D vector, also fills list and rabund
+        bool sim = false;
+               ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
+               cluster->readPhylipFile(thisDistFile, nameMap);
+               tag = cluster->getTag();
+        
+               if (m->control_pressed) { delete cluster; return 0; }
+               
+               list = cluster->getListVector();
+               rabund = cluster->getRAbundVector();
+        
+               if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+               fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+        listFileName = fileroot+ tag + ".list";
+        
+        ofstream listFile;
+               m->openOutputFile(fileroot+ tag + ".list",      listFile);
+               
+               float previousDist = 0.00000;
+               float rndPreviousDist = 0.00000;
+               oldList = *list;
+               
+#ifdef USE_MPI
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Clustering " << thisDistFile << endl;
+        }
+#endif
+
+        m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+        
+               while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
+                       if (m->control_pressed) { delete cluster; delete list; delete rabund; listFile.close();  return listFileName;  }
+            
+                       cluster->update(cutoff);
+            
+                       float dist = cluster->getSmallDist();
+                       float rndDist;
+                       if (hard) {
+                               rndDist = m->ceilDist(dist, precision); 
+                       }else{
+                               rndDist = m->roundDist(dist, precision); 
+                       }
+            
+            if(previousDist <= 0.0000 && dist != previousDist){
+                oldList.setLabel("unique");
+                oldList.print(listFile);
+                if (labels.count("unique") == 0) {  labels.insert("unique");  }
+            }
+            else if(rndDist != rndPreviousDist){
+                oldList.setLabel(toString(rndPreviousDist,  length-1));
+                oldList.print(listFile);
+                if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+            }
+
+            
+                       previousDist = dist;
+                       rndPreviousDist = rndDist;
+                       oldList = *list;
+               }
+        
+               if(previousDist <= 0.0000){
+            oldList.setLabel("unique");
+            oldList.print(listFile);
+            if (labels.count("unique") == 0) { labels.insert("unique"); }
+        }
+        else if(rndPreviousDist<cutoff){
+            oldList.setLabel(toString(rndPreviousDist,  length-1));
+            oldList.print(listFile);
+            if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+        }
+
+        
+               listFile.close();
+               
+               delete cluster; delete nameMap; delete list; delete rabund;
+
+        
+        return listFileName;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "clusterClassicFile");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+       try {
+        string listFileName = "";
+        
+        Cluster* cluster = NULL;
+        SparseMatrix* matrix = NULL;
+        ListVector* list = NULL;
+        ListVector oldList;
+        RAbundVector* rabund = NULL;
+        
+        if (m->control_pressed) { return listFileName; }
+        
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Reading " << thisDistFile << endl;
+        }
+#endif
+        
+        m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+        
+        ReadMatrix* read = new ReadColumnMatrix(thisDistFile);         
+        read->setCutoff(cutoff);
+        
+        NameAssignment* nameMap = new NameAssignment(thisNamefile);
+        nameMap->readMap();
+        read->read(nameMap);
+        
+        if (m->control_pressed) {  delete read; delete nameMap; return listFileName; }
+        
+        list = read->getListVector();
+        oldList = *list;
+        matrix = read->getMatrix();
+        
+        delete read;  read = NULL;
+        delete nameMap; nameMap = NULL;
+        
+        
+#ifdef USE_MPI
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Clustering " << thisDistFile << endl;
+        }
+#endif
+        
+        m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+               
+        rabund = new RAbundVector(list->getRAbundVector());
+        
+        //create cluster
+        if (method == "furthest")      {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
+        else if(method == "nearest"){  cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
+        else if(method == "average"){  cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
+        tag = cluster->getTag();
+               
+        if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+        fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+        
+        ofstream listFile;
+        m->openOutputFile(fileroot+ tag + ".list",     listFile);
+               
+        listFileName = fileroot+ tag + ".list";
+        
+        float previousDist = 0.00000;
+        float rndPreviousDist = 0.00000;
+        
+        oldList = *list;
+        
+        print_start = true;
+        start = time(NULL);
+        double saveCutoff = cutoff;
+               
+        while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+            
+            if (m->control_pressed) { //clean up
+                delete matrix; delete list;    delete cluster; delete rabund;
+                listFile.close();
+                m->mothurRemove(listFileName);         
+                return listFileName;
+            }
+            
+            cluster->update(saveCutoff);
+            
+            float dist = matrix->getSmallDist();
+            float rndDist;
+            if (hard) {
+                rndDist = m->ceilDist(dist, precision); 
+            }else{
+                rndDist = m->roundDist(dist, precision); 
+            }
+            
+            if(previousDist <= 0.0000 && dist != previousDist){
+                oldList.setLabel("unique");
+                oldList.print(listFile);
+                if (labels.count("unique") == 0) {  labels.insert("unique");  }
+            }
+            else if(rndDist != rndPreviousDist){
+                oldList.setLabel(toString(rndPreviousDist,  length-1));
+                oldList.print(listFile);
+                if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+            }
+            
+            previousDist = dist;
+            rndPreviousDist = rndDist;
+            oldList = *list;
+        }
+        
+               
+        if(previousDist <= 0.0000){
+            oldList.setLabel("unique");
+            oldList.print(listFile);
+            if (labels.count("unique") == 0) { labels.insert("unique"); }
+        }
+        else if(rndPreviousDist<cutoff){
+            oldList.setLabel(toString(rndPreviousDist,  length-1));
+            oldList.print(listFile);
+            if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+        }
+        
+        delete matrix; delete list;    delete cluster; delete rabund; 
+        matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
+        listFile.close();
+        
+        if (m->control_pressed) { //clean up
+            m->mothurRemove(listFileName);     
+            return listFileName;
+        }
+        
+        m->mothurRemove(thisDistFile);
+        m->mothurRemove(thisNamefile);
+        
+        if (saveCutoff != cutoff) { 
+            if (hard)  {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+            else               {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       
+            m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
+        }
+        
+        if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
+        
+        return listFileName;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "clusterFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
        try{
index 0e32ffaea7710189859f458de4adc897362e12ef..a82d02270ecc37f1c85e93c9fe51b70e29288081 100644 (file)
@@ -23,6 +23,7 @@
 #include "readmatrix.hpp"
 #include "inputdata.h"
 #include "clustercommand.h"
+#include "clusterclassic.h"
 
 class ClusterSplitCommand : public Command {
        
@@ -48,13 +49,15 @@ private:
        string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile;
        double cutoff, splitcutoff;
        int precision, length, processors, taxLevelCutoff;
-       bool print_start, abort, hard, large;
+       bool print_start, abort, hard, large, classic;
        time_t start;
        ofstream outList, outRabund, outSabund;
        
        void printData(ListVector*);
        vector<string> createProcesses(vector< map<string, string> >, set<string>&);
        vector<string> cluster(vector< map<string, string> >, set<string>&);
+    string clusterFile(string, string, set<string>&, double&);
+    string clusterClassicFile(string, string, set<string>&, double&);
        int mergeLists(vector<string>, map<float, int>, ListVector*);
        map<float, int> completeListFile(vector<string>, string, set<string>&, ListVector*&);
        int createMergedDistanceFile(vector< map<string, string> >);
index 431fc42ef9149658a42ebab3e99e52b21265e313..697521c16abc44dbeee019590f66fb4d03032c64 100644 (file)
@@ -172,9 +172,13 @@ int MakeContigsCommand::execute(){
         //read ffastq and rfastq files creating fasta and qual files.
         //this function will create a forward and reverse, fasta and qual files for each processor.
         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
-        vector< vector<string> > files = readFastqFiles();  
+        int numReads = 0;
+        m->mothurOut("Reading fastq data..."); cout.flush();
+        vector< vector<string> > files = readFastqFiles(numReads);  
+        m->mothurOut("Done.\n");
         
         
+       
         
         string currentFasta = "";
                itTypes = outputTypes.find("fasta");
@@ -203,7 +207,7 @@ int MakeContigsCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-vector< vector<string> > MakeContigsCommand::readFastqFiles(){
+vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
     try {
         vector< vector<string> > files;
         
@@ -250,7 +254,7 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(){
         ifstream inReverse;
         m->openInputFile(rfastqfile, inReverse);
         
-        int count = 0;
+        count = 0;
         while ((!inForward.eof()) && (!inReverse.eof())) {
             
             if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
@@ -275,7 +279,15 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(){
             *(tempfiles[process][3]) << endl;
             
             count++;
-        }
+            
+            //report progress
+                       if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+                       
+               }
+               //report progress
+               if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+               
+
         
         //close files, delete ofstreams
         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
index 5d123d55eace0254a6ae2f6a3a9f48ef589d692a..e63695e77f401b0411df0e9405c1ce99fbd44b98 100644 (file)
@@ -48,7 +48,7 @@ private:
     vector<string> outputNames;
     
     fastqRead readFastq(ifstream&);
-    vector< vector<string> > readFastqFiles();
+    vector< vector<string> > readFastqFiles(int&);
     bool checkReads(fastqRead&, fastqRead&);
 };
 
index dabcd0469461f25a79ac707cf0e785c19974cc1e..59e49a79e9e702cfcf70c47ffee3a42da8a0b537 100644 (file)
@@ -25,7 +25,7 @@ SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, stri
 }
 /***********************************************************************/
 
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, string output){
+SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, bool cl, string output){
        m = MothurOut::getInstance();
        fastafile = ffile;
        namefile = name;
@@ -34,6 +34,7 @@ SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float c
        distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that
        method = t;
        processors = p;
+    classic = cl;
        outputDir = output;
 }
 
@@ -185,7 +186,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                //process each distance file
                for (int i = 0; i < numGroups; i++) { 
                        
-                       string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff);
+                       string options = "";
+            if (classic) { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", output=lt"; }
+            else { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff); }
                        if (outputDir != "") { options += ", outputdir=" + outputDir; }
                        
                        Command* command = new DistanceCommand(options);
@@ -229,7 +232,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                for(int i=0;i<numGroups;i++){
                        string tempNameFile = namefile + "." + toString(i) + ".temp";
                        if (outputDir == "") { outputDir = m->hasPath(fastafile); }
-                       string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
+                       string tempDistFile = "";
+            if (classic) { tempDistFile =  outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "phylip.dist";}
+            else { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; }
 
                        //if there are valid distances
                        ifstream fileHandle;
index 4ff316073789b694916a75fae5d2081a813d96c7..b8aa55119c810871988e76ca156b1746bd4eeb4a 100644 (file)
@@ -20,7 +20,7 @@ class SplitMatrix  {
        public:
 
                SplitMatrix(string, string, string, float, string, bool); //column formatted distance file, namesfile, cutoff, method, large
-               SplitMatrix(string, string, string, float, float, string, int, string); //fastafile, namefile, taxFile, taxcutoff, cutoff, method, processors, outputDir
+               SplitMatrix(string, string, string, float, float, string, int, bool, string); //fastafile, namefile, taxFile, taxcutoff, cutoff, method, processors, classic, outputDir
                
                ~SplitMatrix();
                int split();
@@ -33,7 +33,7 @@ class SplitMatrix  {
                string distFile, namefile, singleton, method, taxFile, fastafile, outputDir;
                vector< map< string, string> > dists;
                float cutoff, distCutoff;
-               bool large;
+               bool large, classic;
                int processors;
                                
                int splitDistance();