]> git.donarmstrong.com Git - mothur.git/commitdiff
Merge branch 'master' of https://github.com/mothur/mothur.git
authorKathryn Iverson <kd.iverson@gmail.com>
Wed, 16 May 2012 17:59:17 +0000 (13:59 -0400)
committerKathryn Iverson <kd.iverson@gmail.com>
Wed, 16 May 2012 17:59:17 +0000 (13:59 -0400)
17 files changed:
Mothur.xcodeproj/project.pbxproj
Mothur.xcodeproj/xcuserdata/SarahsWork.xcuserdatad/xcschemes/Mothur.xcscheme
alignment.cpp
classifyotucommand.cpp
clusterclassic.cpp
clusterclassic.h
clustersplitcommand.cpp
clustersplitcommand.h
commandfactory.cpp
listotulabelscommand.cpp [new file with mode: 0644]
listotulabelscommand.h [new file with mode: 0644]
makecontigscommand.cpp [new file with mode: 0644]
makecontigscommand.h [new file with mode: 0644]
makefastqcommand.cpp
parsefastaqcommand.cpp
splitmatrix.cpp
splitmatrix.h

index ae1df1e962214992fc05f97c4fde02420ea9d735..f40cb4b53045edaf45f23a3784a8d38d7b3c8eec 100644 (file)
@@ -37,6 +37,8 @@
                A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; };
                A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
                A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
+               A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
+               A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; };
                A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; };
                A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; };
                A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
                A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countgroupscommand.cpp; sourceTree = "<group>"; };
                A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = "<group>"; };
                A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = "<group>"; };
+               A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = listotulabelscommand.cpp; sourceTree = "<group>"; };
+               A7A0671C156294810095C8C5 /* listotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listotulabelscommand.h; sourceTree = "<group>"; };
+               A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makecontigscommand.h; sourceTree = "<group>"; };
+               A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makecontigscommand.cpp; sourceTree = "<group>"; };
                A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = "<group>"; };
                A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = "<group>"; };
                A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = "<group>"; };
                                A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */,
                                A7E9B73E12D37EC400DA6239 /* listseqscommand.h */,
                                A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */,
+                               A7A0671C156294810095C8C5 /* listotulabelscommand.h */,
+                               A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */,
                                A7FA10001302E096003860FE /* mantelcommand.h */,
                                A7FA10011302E096003860FE /* mantelcommand.cpp */,
                                A724D2B4153C8600000A826F /* makebiomcommand.h */,
                                A724D2B6153C8628000A826F /* makebiomcommand.cpp */,
+                               A7A0671D1562AC230095C8C5 /* makecontigscommand.h */,
+                               A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */,
                                A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */,
                                A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */,
                                A7E9B74412D37EC400DA6239 /* makegroupcommand.h */,
                                A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */,
                                219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */,
                                219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */,
+                               A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */,
+                               A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 5184c66900b60315219f5f452a689821e00f4bfd..3ced6e4a7c610f456b22a59d421b7ca434d8e6bb 100644 (file)
@@ -14,7 +14,7 @@
             <BuildableReference
                BuildableIdentifier = "primary"
                BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
-               BuildableName = "Mothur"
+               BuildableName = "mothur"
                BlueprintName = "Mothur"
                ReferencedContainer = "container:Mothur.xcodeproj">
             </BuildableReference>
@@ -32,7 +32,7 @@
          <BuildableReference
             BuildableIdentifier = "primary"
             BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
-            BuildableName = "Mothur"
+            BuildableName = "mothur"
             BlueprintName = "Mothur"
             ReferencedContainer = "container:Mothur.xcodeproj">
          </BuildableReference>
@@ -50,7 +50,7 @@
          <BuildableReference
             BuildableIdentifier = "primary"
             BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
-            BuildableName = "Mothur"
+            BuildableName = "mothur"
             BlueprintName = "Mothur"
             ReferencedContainer = "container:Mothur.xcodeproj">
          </BuildableReference>
@@ -68,7 +68,7 @@
          <BuildableReference
             BuildableIdentifier = "primary"
             BlueprintIdentifier = "8DD76FA90486AB0100D96B5E"
-            BuildableName = "Mothur"
+            BuildableName = "mothur"
             BlueprintName = "Mothur"
             ReferencedContainer = "container:Mothur.xcodeproj">
          </BuildableReference>
index 235216e8e59c82be596004559445c021312acb40..03bf9bab62d34b657f1b4cb0dc076171e6d8d9d6 100644 (file)
@@ -15,7 +15,7 @@
 
 /**************************************************************************************************/
 
-Alignment::Alignment() {       /*      do nothing      */      }
+Alignment::Alignment() {       m = MothurOut::getInstance(); /*        do nothing      */      }
 
 /**************************************************************************************************/
 
index c8896375b016ef6c45462351a18e8e520bfe75c2..ec22de8b35762adf92e2a3b750198dcbb91f0396 100644 (file)
@@ -475,6 +475,9 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                                }
                                
                        }
+            
+            //phylotree adds an extra unknown so we want to remove that
+            if (bestChild.name == "unknown") { bestChildSize--; }
                                
                        //is this taxonomy above cutoff
                        int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
index 1ce81c4f2affda469164709f862bb36ff023c163..f0bae59ab33de8c2f451b80c7180a33c49c28a54 100644 (file)
@@ -19,6 +19,10 @@ ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), smallDist
                cutoff = c;
                aboveCutoff = cutoff + 10000.0;
                m = MothurOut::getInstance();
+        if(method == "furthest")        {   tag = "fn";   }
+        else if (method == "average")   {   tag = "an";   }
+        else if (method == "weighted")  {   tag = "wn";   }        
+        else if (method == "nearest")   {   tag = "nn";   }
        }
        catch(exception& e) {
                m->errorOut(e, "ClusterClassic", "ClusterClassic");
index 110c0408e5107e9acebd322c7a1070f364c13a7d..932c806ec42ac52a4a90670239d0c4a6a27b1efb 100644 (file)
@@ -1,8 +1,7 @@
-#ifndef CLUSTER_H
-#define CLUSTER_H
+#ifndef CLUSTERCLASSIC_H
+#define CLUSTERCLASSIC_H
 
 
-#include "mothur.h"
 #include "mothurout.h"
 #include "listvector.hpp"
 #include "rabundvector.hpp"
@@ -28,7 +27,7 @@ public:
        int getNSeqs() { return nseqs; }        
        ListVector* getListVector() { return list; }
        RAbundVector* getRAbundVector() { return rabund; }              
-       string getTag();
+       string getTag() { return tag; }
        void setMapWanted(bool m);  
        map<string, int> getSeqtoBin()  {  return seq2Bin;      }
 
@@ -57,7 +56,7 @@ private:
        bool mapWanted, sim;
        double cutoff, aboveCutoff;
        map<string, int> seq2Bin;
-       string method;
+       string method, tag;
        
        MothurOut* m;
 };
index 9cdd7b065b857e5ddb7eaacbc27e4c3af84a43cb..2eee59f6165e28c50f86fd6ab9b1db0920135040 100644 (file)
@@ -10,7 +10,6 @@
 #include "clustersplitcommand.h"
 
 
-
 //**********************************************************************************************************************
 vector<string> ClusterSplitCommand::setParameters(){   
        try {
@@ -29,6 +28,7 @@ vector<string> ClusterSplitCommand::setParameters(){
                CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
                CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
                CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
+        CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pclassic);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                        
@@ -62,6 +62,7 @@ string ClusterSplitCommand::getHelpString(){
                helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
                helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
                helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
+        helpString += "The classic parameter allows you to indicate that you want to run your files with cluster.classic.  It is only valid with splitmethod=fasta. Default=f.\n";
 #ifdef USE_MPI
                helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
 #endif
@@ -266,7 +267,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
                        large = m->isTrue(temp);
-                       
+            
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
@@ -277,6 +278,11 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                else {  splitmethod = temp; }
                        }
                        
+            temp = validParameter.validFile(parameters, "classic", false);                     if (temp == "not found") { temp = "F"; }
+                       classic = m->isTrue(temp);
+            
+            if ((splitmethod != "fasta") && classic) { m->mothurOut("splitmethod must be fasta to use cluster.classic.\n"); abort=true; }
+
                        temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "0.25"; }
                        m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));  
@@ -374,7 +380,7 @@ int ClusterSplitCommand::execute(){
                SplitMatrix* split;
                if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
                else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
-               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
+               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, classic, outputDir);     }
                else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
                
                split->split();
@@ -1021,148 +1027,20 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                //cluster each distance file
                for (int i = 0; i < distNames.size(); i++) {
             
-            Cluster* cluster = NULL;
-            SparseMatrix* matrix = NULL;
-            ListVector* list = NULL;
-            ListVector oldList;
-            RAbundVector* rabund = NULL;
-            
-                       if (m->control_pressed) { return listFileNames; }
-                       
                        string thisNamefile = distNames[i].begin()->second;
                        string thisDistFile = distNames[i].begin()->first;
-                                               
-                       #ifdef USE_MPI
-                               int pid;
-                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                               
-                               //output your files too
-                               if (pid != 0) {
-                                       cout << endl << "Reading " << thisDistFile << endl;
-                               }
-                       #endif
-                       
-                       m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
-                       
-                       ReadMatrix* read = new ReadColumnMatrix(thisDistFile);  
-                       read->setCutoff(cutoff);
-
-                       NameAssignment* nameMap = new NameAssignment(thisNamefile);
-                       nameMap->readMap();
-                       read->read(nameMap);
-                       
-                       if (m->control_pressed) {  delete read; delete nameMap; return listFileNames; }
-                       
-                       list = read->getListVector();
-                       oldList = *list;
-                       matrix = read->getMatrix();
-                       
-                       delete read;  read = NULL;
-                       delete nameMap; nameMap = NULL;
-                       
-                       
-                       #ifdef USE_MPI
-                               //output your files too
-                               if (pid != 0) {
-                                       cout << endl << "Clustering " << thisDistFile << endl;
-                               }
-                       #endif
                        
-                       m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
-               
-                       rabund = new RAbundVector(list->getRAbundVector());
-                       
-                       //create cluster
-                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
-                       tag = cluster->getTag();
-               
-                       if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
-                       fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
-                       
-                       ofstream listFile;
-                       m->openOutputFile(fileroot+ tag + ".list",      listFile);
-               
-                       listFileNames.push_back(fileroot+ tag + ".list");
-                               
-                       float previousDist = 0.00000;
-                       float rndPreviousDist = 0.00000;
-                       
-                       oldList = *list;
-
-                       print_start = true;
-                       start = time(NULL);
-                       double saveCutoff = cutoff;
-               
-                       while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
-               
-                               if (m->control_pressed) { //clean up
-                                       delete matrix; delete list;     delete cluster; delete rabund;
-                                       listFile.close();
-                                       for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
-                                       listFileNames.clear(); return listFileNames;
-                               }
-               
-                               cluster->update(saveCutoff);
-       
-                               float dist = matrix->getSmallDist();
-                               float rndDist;
-                               if (hard) {
-                                       rndDist = m->ceilDist(dist, precision); 
-                               }else{
-                                       rndDist = m->roundDist(dist, precision); 
-                               }
-
-                               if(previousDist <= 0.0000 && dist != previousDist){
-                                       oldList.setLabel("unique");
-                                       oldList.print(listFile);
-                                       if (labels.count("unique") == 0) {  labels.insert("unique");  }
-                               }
-                               else if(rndDist != rndPreviousDist){
-                                       oldList.setLabel(toString(rndPreviousDist,  length-1));
-                                       oldList.print(listFile);
-                                       if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
-                               }
-               
-                               previousDist = dist;
-                               rndPreviousDist = rndDist;
-                               oldList = *list;
-                       }
+                       string listFileName = "";
+            if (classic)    {  listFileName = clusterClassicFile(thisDistFile, thisNamefile, labels, smallestCutoff);   }
+            else            {  listFileName = clusterFile(thisDistFile, thisNamefile, labels, smallestCutoff);          }
 
-               
-                       if(previousDist <= 0.0000){
-                               oldList.setLabel("unique");
-                               oldList.print(listFile);
-                               if (labels.count("unique") == 0) { labels.insert("unique"); }
-                       }
-                       else if(rndPreviousDist<cutoff){
-                               oldList.setLabel(toString(rndPreviousDist,  length-1));
-                               oldList.print(listFile);
-                               if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
-                       }
-       
-                       delete matrix; delete list;     delete cluster; delete rabund; 
-            matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
-                       listFile.close();
-                       
                        if (m->control_pressed) { //clean up
                                for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
                                listFileNames.clear(); return listFileNames;
                        }
-                       
-                       m->mothurRemove(thisDistFile);
-                       m->mothurRemove(thisNamefile);
-                       
-                       if (saveCutoff != cutoff) { 
-                               if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
-                               else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
-                       
-                               m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
-                       }
-                       
-                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
-               }
+            
+            listFileNames.push_back(listFileName);
+        }
                
                cutoff = smallestCutoff;
                                        
@@ -1177,6 +1055,269 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
 
 }
 //**********************************************************************************************************************
+string ClusterSplitCommand::clusterClassicFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+       try {
+        string listFileName = "";
+        
+        ListVector* list = NULL;
+        ListVector oldList;
+        RAbundVector* rabund = NULL;
+        
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Reading " << thisDistFile << endl;
+        }
+#endif
+
+        m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+        
+        NameAssignment* nameMap = new NameAssignment(thisNamefile);
+        nameMap->readMap();
+                               
+               //reads phylip file storing data in 2D vector, also fills list and rabund
+        bool sim = false;
+               ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim);
+               cluster->readPhylipFile(thisDistFile, nameMap);
+               tag = cluster->getTag();
+        
+               if (m->control_pressed) { delete cluster; return 0; }
+               
+               list = cluster->getListVector();
+               rabund = cluster->getRAbundVector();
+        
+               if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+               fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+        listFileName = fileroot+ tag + ".list";
+        
+        ofstream listFile;
+               m->openOutputFile(fileroot+ tag + ".list",      listFile);
+               
+               float previousDist = 0.00000;
+               float rndPreviousDist = 0.00000;
+               oldList = *list;
+               
+#ifdef USE_MPI
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Clustering " << thisDistFile << endl;
+        }
+#endif
+
+        m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+        
+               while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
+                       if (m->control_pressed) { delete cluster; delete list; delete rabund; listFile.close();  return listFileName;  }
+            
+                       cluster->update(cutoff);
+            
+                       float dist = cluster->getSmallDist();
+                       float rndDist;
+                       if (hard) {
+                               rndDist = m->ceilDist(dist, precision); 
+                       }else{
+                               rndDist = m->roundDist(dist, precision); 
+                       }
+            
+            if(previousDist <= 0.0000 && dist != previousDist){
+                oldList.setLabel("unique");
+                oldList.print(listFile);
+                if (labels.count("unique") == 0) {  labels.insert("unique");  }
+            }
+            else if(rndDist != rndPreviousDist){
+                oldList.setLabel(toString(rndPreviousDist,  length-1));
+                oldList.print(listFile);
+                if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+            }
+
+            
+                       previousDist = dist;
+                       rndPreviousDist = rndDist;
+                       oldList = *list;
+               }
+        
+               if(previousDist <= 0.0000){
+            oldList.setLabel("unique");
+            oldList.print(listFile);
+            if (labels.count("unique") == 0) { labels.insert("unique"); }
+        }
+        else if(rndPreviousDist<cutoff){
+            oldList.setLabel(toString(rndPreviousDist,  length-1));
+            oldList.print(listFile);
+            if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+        }
+
+        
+               listFile.close();
+               
+               delete cluster; delete nameMap; delete list; delete rabund;
+
+        
+        return listFileName;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "clusterClassicFile");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+string ClusterSplitCommand::clusterFile(string thisDistFile, string thisNamefile, set<string>& labels, double& smallestCutoff){
+       try {
+        string listFileName = "";
+        
+        Cluster* cluster = NULL;
+        SparseMatrix* matrix = NULL;
+        ListVector* list = NULL;
+        ListVector oldList;
+        RAbundVector* rabund = NULL;
+        
+        if (m->control_pressed) { return listFileName; }
+        
+#ifdef USE_MPI
+        int pid;
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+        
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Reading " << thisDistFile << endl;
+        }
+#endif
+        
+        m->mothurOutEndLine(); m->mothurOut("Reading " + thisDistFile); m->mothurOutEndLine();
+        
+        ReadMatrix* read = new ReadColumnMatrix(thisDistFile);         
+        read->setCutoff(cutoff);
+        
+        NameAssignment* nameMap = new NameAssignment(thisNamefile);
+        nameMap->readMap();
+        read->read(nameMap);
+        
+        if (m->control_pressed) {  delete read; delete nameMap; return listFileName; }
+        
+        list = read->getListVector();
+        oldList = *list;
+        matrix = read->getMatrix();
+        
+        delete read;  read = NULL;
+        delete nameMap; nameMap = NULL;
+        
+        
+#ifdef USE_MPI
+        //output your files too
+        if (pid != 0) {
+            cout << endl << "Clustering " << thisDistFile << endl;
+        }
+#endif
+        
+        m->mothurOutEndLine(); m->mothurOut("Clustering " + thisDistFile); m->mothurOutEndLine();
+               
+        rabund = new RAbundVector(list->getRAbundVector());
+        
+        //create cluster
+        if (method == "furthest")      {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
+        else if(method == "nearest"){  cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
+        else if(method == "average"){  cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
+        tag = cluster->getTag();
+               
+        if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+        fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
+        
+        ofstream listFile;
+        m->openOutputFile(fileroot+ tag + ".list",     listFile);
+               
+        listFileName = fileroot+ tag + ".list";
+        
+        float previousDist = 0.00000;
+        float rndPreviousDist = 0.00000;
+        
+        oldList = *list;
+        
+        print_start = true;
+        start = time(NULL);
+        double saveCutoff = cutoff;
+               
+        while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){
+            
+            if (m->control_pressed) { //clean up
+                delete matrix; delete list;    delete cluster; delete rabund;
+                listFile.close();
+                m->mothurRemove(listFileName);         
+                return listFileName;
+            }
+            
+            cluster->update(saveCutoff);
+            
+            float dist = matrix->getSmallDist();
+            float rndDist;
+            if (hard) {
+                rndDist = m->ceilDist(dist, precision); 
+            }else{
+                rndDist = m->roundDist(dist, precision); 
+            }
+            
+            if(previousDist <= 0.0000 && dist != previousDist){
+                oldList.setLabel("unique");
+                oldList.print(listFile);
+                if (labels.count("unique") == 0) {  labels.insert("unique");  }
+            }
+            else if(rndDist != rndPreviousDist){
+                oldList.setLabel(toString(rndPreviousDist,  length-1));
+                oldList.print(listFile);
+                if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+            }
+            
+            previousDist = dist;
+            rndPreviousDist = rndDist;
+            oldList = *list;
+        }
+        
+               
+        if(previousDist <= 0.0000){
+            oldList.setLabel("unique");
+            oldList.print(listFile);
+            if (labels.count("unique") == 0) { labels.insert("unique"); }
+        }
+        else if(rndPreviousDist<cutoff){
+            oldList.setLabel(toString(rndPreviousDist,  length-1));
+            oldList.print(listFile);
+            if (labels.count(toString(rndPreviousDist,  length-1)) == 0) { labels.insert(toString(rndPreviousDist,  length-1)); }
+        }
+        
+        delete matrix; delete list;    delete cluster; delete rabund; 
+        matrix = NULL; list = NULL; cluster = NULL; rabund = NULL;
+        listFile.close();
+        
+        if (m->control_pressed) { //clean up
+            m->mothurRemove(listFileName);     
+            return listFileName;
+        }
+        
+        m->mothurRemove(thisDistFile);
+        m->mothurRemove(thisNamefile);
+        
+        if (saveCutoff != cutoff) { 
+            if (hard)  {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+            else               {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       
+            m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
+        }
+        
+        if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
+        
+        return listFileName;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "clusterFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
        try{
index 0e32ffaea7710189859f458de4adc897362e12ef..a82d02270ecc37f1c85e93c9fe51b70e29288081 100644 (file)
@@ -23,6 +23,7 @@
 #include "readmatrix.hpp"
 #include "inputdata.h"
 #include "clustercommand.h"
+#include "clusterclassic.h"
 
 class ClusterSplitCommand : public Command {
        
@@ -48,13 +49,15 @@ private:
        string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile;
        double cutoff, splitcutoff;
        int precision, length, processors, taxLevelCutoff;
-       bool print_start, abort, hard, large;
+       bool print_start, abort, hard, large, classic;
        time_t start;
        ofstream outList, outRabund, outSabund;
        
        void printData(ListVector*);
        vector<string> createProcesses(vector< map<string, string> >, set<string>&);
        vector<string> cluster(vector< map<string, string> >, set<string>&);
+    string clusterFile(string, string, set<string>&, double&);
+    string clusterClassicFile(string, string, set<string>&, double&);
        int mergeLists(vector<string>, map<float, int>, ListVector*);
        map<float, int> completeListFile(vector<string>, string, set<string>&, ListVector*&);
        int createMergedDistanceFile(vector< map<string, string> >);
index d70c10a597d265c631665c7ab61ab9ea1ce940a7..e73d60b3683acb5b8b3d25ea583be045354adce7 100644 (file)
 #include "createdatabasecommand.h"
 #include "makebiomcommand.h"
 #include "getcoremicrobiomecommand.h"
+#include "listotulabelscommand.h"
 
 /*******************************************************/
 
@@ -284,7 +285,8 @@ CommandFactory::CommandFactory(){
     commands["pcr.seqs"]            = "pcr.seqs";
     commands["create.database"]     = "create.database";
     commands["make.biom"]           = "make.biom";
-    commands["get.coremicrobiome"]   = "get.coremicrobiome";
+    commands["get.coremicrobiome"]  = "get.coremicrobiome";
+    commands["list.otulabels"]      = "list.otulabels";
        commands["quit"]                                = "MPIEnabled"; 
 
 }
@@ -496,6 +498,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "create.database")       {      command = new CreateDatabaseCommand(optionString);          }
         else if(commandName == "make.biom")             {      command = new MakeBiomCommand(optionString);                }
         else if(commandName == "get.coremicrobiome")    {      command = new GetCoreMicroBiomeCommand(optionString);       }
+        else if(commandName == "list.otulabels")        {      command = new ListOtuLabelsCommand(optionString);           }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -648,6 +651,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "create.database")       {      pipecommand = new CreateDatabaseCommand(optionString);          }
         else if(commandName == "make.biom")             {      pipecommand = new MakeBiomCommand(optionString);                }
         else if(commandName == "get.coremicrobiome")    {      pipecommand = new GetCoreMicroBiomeCommand(optionString);       }
+        else if(commandName == "list.otulabels")        {      pipecommand = new ListOtuLabelsCommand(optionString);           }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -786,6 +790,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "create.database")       {      shellcommand = new CreateDatabaseCommand();         }
         else if(commandName == "make.biom")             {      shellcommand = new MakeBiomCommand();               }
         else if(commandName == "get.coremicrobiome")    {      shellcommand = new GetCoreMicroBiomeCommand();      }
+        else if(commandName == "list.otulabels")        {      shellcommand = new ListOtuLabelsCommand();          }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
diff --git a/listotulabelscommand.cpp b/listotulabelscommand.cpp
new file mode 100644 (file)
index 0000000..becfae3
--- /dev/null
@@ -0,0 +1,387 @@
+//
+//  listotucommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/15/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "listotulabelscommand.h"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> ListOtuLabelsCommand::setParameters(){  
+       try {
+        CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared);
+               CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund);
+        CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+        //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ListOtuLabelsCommand::getHelpString(){  
+       try {
+               string helpString = "";
+               helpString += "The list.labels lists otu labels from shared or relabund file. The results can be used by the get.labels to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n";
+               helpString += "The list.labels parameters are: shared, relabund, label and groups.\n";
+               helpString += "The label parameter is used to analyze specific labels in your input.\n";
+               helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n";
+               helpString += "The list.labels commmand should be in the following format: \n";
+               helpString += "list.otulabels(shared=yourSharedFile, groups=yourGroup1-yourGroup2)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ListOtuLabelsCommand::ListOtuLabelsCommand(){  
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["otulabels"] = tempOutNames; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "ListOtuLabelsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ListOtuLabelsCommand::ListOtuLabelsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               allLines = 1;
+        
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                
+                //edit file types below to include only the types you added as parameters
+                
+                               string path;
+                it = parameters.find("relabund");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
+                               }
+                
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+            }
+            
+            vector<string> tempOutNames;
+            outputTypes["otulabels"] = tempOutNames; 
+            
+                       //check for parameters
+            sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { abort = true; }
+                       else if (sharedfile == "not found") { sharedfile = ""; }
+                       else { inputFileName = sharedfile; format = "sharedfile"; m->setSharedFile(sharedfile); }
+                       
+                       relabundfile = validParameter.validFile(parameters, "relabund", true);
+                       if (relabundfile == "not open") { abort = true; }
+                       else if (relabundfile == "not found") { relabundfile = ""; }
+                       else { inputFileName = relabundfile; format = "relabund"; m->setRelAbundFile(relabundfile); }
+            
+            if ((relabundfile == "") && (sharedfile == "")) { 
+                               //is there are current file available for either of these?
+                               //give priority to shared, then relabund
+                               sharedfile = m->getSharedFile(); 
+                               if (sharedfile != "") {  inputFileName = sharedfile; format="sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       relabundfile = m->getRelAbundFile(); 
+                                       if (relabundfile != "") {  inputFileName = relabundfile; format="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine(); 
+                                               abort = true;
+                                       }
+                               }
+                       }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = m->hasPath(inputFileName); //if user entered a file with a path then preserve it    
+                       }
+            
+            string groups = validParameter.validFile(parameters, "groups", false);                     
+                       if (groups == "not found") { groups = ""; }
+                       else { m->splitAtDash(groups, Groups); }
+                       m->setGroups(Groups);
+            
+            string label = validParameter.validFile(parameters, "label", false);                       
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }                       
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "ListOtuLabelsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        InputData input(inputFileName, format);
+        
+        if (format == "relabund") {
+            vector<SharedRAbundFloatVector*> lookup = input.getSharedRAbundFloatVectors();
+            string lastLabel = lookup[0]->getLabel();
+            
+            //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+            set<string> processedLabels;
+            set<string> userLabels = labels;
+            
+            //as long as you are not at the end of the file or done wih the lines you want
+            while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                
+                if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+                
+                if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                 
+                    
+                    m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(lookup);
+                    
+                    processedLabels.insert(lookup[0]->getLabel());
+                    userLabels.erase(lookup[0]->getLabel());
+                }
+                
+                if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                    string saveLabel = lookup[0]->getLabel();
+                    
+                    for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
+                    lookup = input.getSharedRAbundFloatVectors(lastLabel);
+                    m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(lookup);
+                    
+                    processedLabels.insert(lookup[0]->getLabel());
+                    userLabels.erase(lookup[0]->getLabel());
+                    
+                    //restore real lastlabel to save below
+                    lookup[0]->setLabel(saveLabel);
+                }
+                
+                lastLabel = lookup[0]->getLabel();
+                //prevent memory leak
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+                
+                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+                
+                //get next line to process
+                lookup = input.getSharedRAbundFloatVectors();                          
+            }
+            
+            if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+            
+            //output error messages about any remaining user labels
+            set<string>::iterator it;
+            bool needToRun = false;
+            for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                m->mothurOut("Your file does not include the label " + *it); 
+                if (processedLabels.count(lastLabel) != 1) {
+                    m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                    needToRun = true;
+                }else {
+                    m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                }
+            }
+            
+            //run last label if you need to
+            if (needToRun == true)  {
+                for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
+                lookup = input.getSharedRAbundFloatVectors(lastLabel);
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                createList(lookup);
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+            }
+        }else {
+            
+            vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+            string lastLabel = lookup[0]->getLabel();
+            
+            //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+            set<string> processedLabels;
+            set<string> userLabels = labels;
+            
+            //as long as you are not at the end of the file or done wih the lines you want
+            while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                
+                if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+                
+                if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                 
+                    
+                    m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(lookup);
+                    
+                    processedLabels.insert(lookup[0]->getLabel());
+                    userLabels.erase(lookup[0]->getLabel());
+                }
+                
+                if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                    string saveLabel = lookup[0]->getLabel();
+                    
+                    for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
+                    lookup = input.getSharedRAbundVectors(lastLabel);
+                    m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                    
+                    createList(lookup);
+                    
+                    processedLabels.insert(lookup[0]->getLabel());
+                    userLabels.erase(lookup[0]->getLabel());
+                    
+                    //restore real lastlabel to save below
+                    lookup[0]->setLabel(saveLabel);
+                }
+                
+                lastLabel = lookup[0]->getLabel();
+                //prevent memory leak
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+                
+                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+                
+                //get next line to process
+                lookup = input.getSharedRAbundVectors();                               
+            }
+            
+            if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+            
+            //output error messages about any remaining user labels
+            set<string>::iterator it;
+            bool needToRun = false;
+            for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                m->mothurOut("Your file does not include the label " + *it); 
+                if (processedLabels.count(lastLabel) != 1) {
+                    m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                    needToRun = true;
+                }else {
+                    m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                }
+            }
+            
+            //run last label if you need to
+            if (needToRun == true)  {
+                for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                createList(lookup);
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+            }
+        }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
+        
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::createList(vector<SharedRAbundVector*>& lookup){
+       try {
+        
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels";
+        outputNames.push_back(outputFileName);  outputTypes["otulabels"].push_back(outputFileName);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+        for (int i = 0; i < m->currentBinLabels.size(); i++) {  out << m->currentBinLabels[i] << endl;  }
+        
+        out.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "createTable");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int ListOtuLabelsCommand::createList(vector<SharedRAbundFloatVector*>& lookup){
+       try {
+        
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels";
+        outputNames.push_back(outputFileName);  outputTypes["accnos"].push_back(outputFileName);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+        for (int i = 0; i < m->currentBinLabels.size(); i++) {  out << m->currentBinLabels[i] << endl;  }
+        
+        out.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ListOtuLabelsCommand", "createTable");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
diff --git a/listotulabelscommand.h b/listotulabelscommand.h
new file mode 100644 (file)
index 0000000..ea41539
--- /dev/null
@@ -0,0 +1,54 @@
+#ifndef Mothur_listotucommand_h
+#define Mothur_listotucommand_h
+
+//
+//  listotucommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/15/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+
+#include "command.hpp"
+#include "sharedrabundvector.h"
+
+/**************************************************************************************************/
+
+class ListOtuLabelsCommand : public Command {
+public:
+    ListOtuLabelsCommand(string);
+    ListOtuLabelsCommand();
+    ~ListOtuLabelsCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "list.otulabels";          }
+    string getCommandCategory()                { return "OTU-Based Approaches";        } 
+    //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+    string getHelpString();    
+    string getCitation() { return "http://www.mothur.org/wiki/List.otulabels"; }
+    string getDescription()            { return "lists otu labels from shared or relabund file. Can be used with output from classify.otu, otu.association, or corr.axes to select specific otus."; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+    
+private:
+    bool abort, allLines;
+    string outputDir, sharedfile, relabundfile, label, inputFileName, format;
+    vector<string> outputNames;
+    vector<string> Groups;
+    set<string> labels;
+    
+    int createList(vector<SharedRAbundFloatVector*>&);
+    int createList(vector<SharedRAbundVector*>&);
+
+};
+
+/**************************************************************************************************/
+
+
+
+
+
+
+#endif
diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp
new file mode 100644 (file)
index 0000000..697521c
--- /dev/null
@@ -0,0 +1,401 @@
+//
+//  makecontigscommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/15/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "makecontigscommand.h"
+
+//**********************************************************************************************************************
+vector<string> MakeContigsCommand::setParameters(){    
+       try {
+               CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+        CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
+               CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign);
+               CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
+               CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
+               CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
+               CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string MakeContigsCommand::getHelpString(){    
+       try {
+               string helpString = "";
+               helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n";
+               helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n";
+               helpString += "The ffastq and rfastq parameter is required.\n";
+               helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
+               helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
+               helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
+               helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
+               helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
+        helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
+        helpString += "The make.contigs command should be in the following format: \n";
+               helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
+               helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "getHelpString");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+MakeContigsCommand::MakeContigsCommand(){      
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["qfile"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+MakeContigsCommand::MakeContigsCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+        
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters(); 
+                       
+                       ValidParameters validParameter("pairwise.seqs");
+                       map<string, string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["qfile"] = tempOutNames;
+                       
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else { 
+                               string path;
+                it = parameters.find("ffastq");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["ffastq"] = inputDir + it->second;           }
+                               }
+                
+                it = parameters.find("rfastq");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["rfastq"] = inputDir + it->second;           }
+                               }
+            }
+            
+            ffastqfile = validParameter.validFile(parameters, "ffastq", true);
+                       if (ffastqfile == "not open") { ffastqfile = ""; abort = true; }        
+                       else if (ffastqfile == "not found") { ffastqfile = ""; abort=true;  m->mothurOut("The ffastq parameter is required.\n"); }
+                       
+                       rfastqfile = validParameter.validFile(parameters, "rfastq", true);
+                       if (rfastqfile == "not open") { rfastqfile = ""; abort = true; }        
+                       else if (rfastqfile == "not found") { rfastqfile = ""; abort=true;  m->mothurOut("The rfastq parameter is required.\n"); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(ffastqfile);             }
+                       
+
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       string temp;
+                       temp = validParameter.validFile(parameters, "match", false);            if (temp == "not found"){       temp = "1.0";                   }
+                       m->mothurConvert(temp, match);  
+                       
+                       temp = validParameter.validFile(parameters, "mismatch", false);         if (temp == "not found"){       temp = "-1.0";                  }
+                       m->mothurConvert(temp, misMatch);  
+            if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; }
+                       
+                       temp = validParameter.validFile(parameters, "gapopen", false);          if (temp == "not found"){       temp = "-2.0";                  }
+                       m->mothurConvert(temp, gapOpen);  
+            if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; }
+                       
+                       temp = validParameter.validFile(parameters, "gapextend", false);        if (temp == "not found"){       temp = "-1.0";                  }
+                       m->mothurConvert(temp, gapExtend); 
+            if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; }
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       m->mothurConvert(temp, processors);
+                       
+                       align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
+                       if ((align != "needleman") && (align != "blast") && (align != "gotoh") && (align != "noalign")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman, blast, gotoh and noalign. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
+        }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int MakeContigsCommand::execute(){
+       try {
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        //read ffastq and rfastq files creating fasta and qual files.
+        //this function will create a forward and reverse, fasta and qual files for each processor.
+        //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
+        int numReads = 0;
+        m->mothurOut("Reading fastq data..."); cout.flush();
+        vector< vector<string> > files = readFastqFiles(numReads);  
+        m->mothurOut("Done.\n");
+        
+        
+       
+        
+        string currentFasta = "";
+               itTypes = outputTypes.find("fasta");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
+               }
+        
+        string currentQual = "";
+               itTypes = outputTypes.find("qfile");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); }
+               }
+               
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
+    try {
+        vector< vector<string> > files;
+        
+        //maps processors number to file pointer
+        map<int, vector<ofstream*> > tempfiles;  //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual
+        map<int, vector<ofstream*> >::iterator it;
+        
+        //create files to write to
+        for (int i = 0; i < processors; i++) {
+            vector<ofstream*> temp;
+            ofstream* outFF = new ofstream;     temp.push_back(outFF);
+            ofstream* outFQ = new ofstream;     temp.push_back(outFQ);
+            ofstream* outRF = new ofstream;     temp.push_back(outRF);
+            ofstream* outRQ = new ofstream;     temp.push_back(outRQ);
+            tempfiles[i] = temp;
+            
+            vector<string> names;
+            string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp";
+            string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp";
+            string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp";
+            string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp";
+            names.push_back(ffastafilename); names.push_back(fqualfilename);
+            names.push_back(rfastafilename); names.push_back(rqualfilename);
+            files.push_back(names);
+            
+            m->openOutputFile(ffastafilename, *outFF);
+            m->openOutputFile(rfastafilename, *outRF);
+            m->openOutputFile(fqualfilename, *outFQ);
+            m->openOutputFile(rqualfilename, *outRQ);
+        }
+        
+        if (m->control_pressed) {
+            //close files, delete ofstreams
+            for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
+            //remove files
+            for (int i = 0; i < files.size(); i++) {  
+                for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }
+            }
+        }
+        
+        ifstream inForward;
+        m->openInputFile(ffastqfile, inForward);
+        
+        ifstream inReverse;
+        m->openInputFile(rfastqfile, inReverse);
+        
+        count = 0;
+        while ((!inForward.eof()) && (!inReverse.eof())) {
+            
+            if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
+            
+            //get a read from forward and reverse fastq files
+            fastqRead fread = readFastq(inForward);
+            fastqRead rread = readFastq(inReverse);
+            checkReads(fread, rread);
+            
+            if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) {  for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; }
+            
+            //if the reads are okay write to output files
+            int process = count % processors;
+            
+            *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl;
+            *(tempfiles[process][1]) << ">" << fread.name << endl;
+            for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; }
+            *(tempfiles[process][1]) << endl;
+            *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl;
+            *(tempfiles[process][3]) << ">" << rread.name << endl;
+            for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; }
+            *(tempfiles[process][3]) << endl;
+            
+            count++;
+            
+            //report progress
+                       if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+                       
+               }
+               //report progress
+               if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+               
+
+        
+        //close files, delete ofstreams
+        for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
+        inForward.close();
+        inReverse.close();
+        
+        //adjust for really large processors or really small files
+        if (count < processors) { 
+            for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); }
+            files.resize(count);
+            processors = count; 
+        }
+        
+        return files;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeContigsCommand", "readFastqFiles");
+        exit(1);
+    }
+}
+
+//**********************************************************************************************************************
+fastqRead MakeContigsCommand::readFastq(ifstream& in){
+    try {
+        fastqRead read;
+        
+        //read sequence name
+        string name = m->getline(in); m->gobble(in);
+        if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        else { name = name.substr(1); }
+        
+        //read sequence
+        string sequence = m->getline(in); m->gobble(in);
+        if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        
+        //read sequence name
+        string name2 = m->getline(in); m->gobble(in);
+        if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        else { name2 = name2.substr(1);  }
+        
+        //read quality scores
+        string quality = m->getline(in); m->gobble(in);
+        if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        
+        //sanity check sequence length and number of quality scores match
+        if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; return read; } }
+        if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; return read; }
+        
+        vector<int> qualScores;
+               int controlChar = int('@');
+               for (int i = 0; i < quality.length(); i++) { 
+                       int temp = int(quality[i]);
+                       temp -= controlChar;
+                       
+                       qualScores.push_back(temp);
+               }
+
+        read.name = name;
+        read.sequence = sequence;
+        read.scores = qualScores;
+
+        return read;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeContigsCommand", "readFastq");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){
+    try {
+        bool good = true;
+        
+        //fix names
+        if ((forward.name.length() > 2) && (reverse.name.length() > 2)) {
+            forward.name = forward.name.substr(0, forward.name.length()-2);
+            reverse.name = reverse.name.substr(0, reverse.name.length()-2);
+        }else { good = false; m->control_pressed = true; }
+        
+        //do names match
+        if (forward.name != reverse.name) {
+            m->mothurOut("[ERROR]: read " + forward.name + " from " + ffastqfile + ", but read " + reverse.name + " from " + rfastqfile + ".\n");
+            good = false; m->control_pressed = true;
+        }
+        
+        //do sequence lengths match
+        if (forward.sequence.length() != reverse.sequence.length()) {
+            m->mothurOut("[ERROR]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ".\n");
+            good = false; m->control_pressed = true;
+        }
+        
+        //do number of qual scores match 
+        if (forward.scores.size() != reverse.scores.size()) {
+            m->mothurOut("[ERROR]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read  " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ".\n");
+            good = false; m->control_pressed = true;
+        }
+
+        return good;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MakeContigsCommand", "readFastq");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+
+
+
+
diff --git a/makecontigscommand.h b/makecontigscommand.h
new file mode 100644 (file)
index 0000000..e63695e
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef Mothur_makecontigscommand_h
+#define Mothur_makecontigscommand_h
+
+//
+//  makecontigscommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 5/15/12.
+//  Copyright (c) 2012 Schloss Lab. All rights reserved.
+//
+
+#include "command.hpp"
+
+struct fastqRead {
+       vector<int> scores;
+       string name;
+       string sequence;
+       
+       fastqRead() { name = ""; sequence = ""; scores.clear(); };
+       fastqRead(string n, string s, vector<int> sc) : name(n), sequence(s), scores(sc) {};
+       ~fastqRead() {};
+};
+
+/**************************************************************************************************/
+
+class MakeContigsCommand : public Command {
+public:
+    MakeContigsCommand(string);
+    MakeContigsCommand();
+    ~MakeContigsCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "make.contigs";                        }
+    string getCommandCategory()                { return "Sequence Processing";         } 
+    //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+    string getHelpString();    
+    string getCitation() { return "http://www.mothur.org/wiki/Make.contigs"; }
+    string getDescription()            { return "description"; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+    
+private:
+    bool abort;
+    string outputDir, ffastqfile, rfastqfile, align;
+       float match, misMatch, gapOpen, gapExtend;
+       int processors, longestBase;
+    vector<string> outputNames;
+    
+    fastqRead readFastq(ifstream&);
+    vector< vector<string> > readFastqFiles(int&);
+    bool checkReads(fastqRead&, fastqRead&);
+};
+
+/**************************************************************************************************/
+
+
+
+
+
+
+
+#endif
index 8e1a7f52882fe1cf8e3b12a60538915306dacb46..0f6e030bb0d1fec3f7cf99614fa0827c66de2bb5 100644 (file)
@@ -201,7 +201,7 @@ string MakeFastQCommand::convertQual(vector<int> qual) {
        try {
                string qualScores;
                
-               int controlChar = int('!');
+               int controlChar = int('@');
                
                for (int i = 0; i < qual.size(); i++) { 
                        int temp = qual[i] + controlChar;
index e6d7ce6d1aa30da836bb88f2833bd666ae11bd85..06ea359e6740bd822691e9f8ab6e3e53454cc724 100644 (file)
@@ -216,7 +216,7 @@ vector<int> ParseFastaQCommand::convertQual(string qual) {
        try {
                vector<int> qualScores;
                
-               int controlChar = int('!');
+               int controlChar = int('@');
                
                for (int i = 0; i < qual.length(); i++) { 
                        int temp = int(qual[i]);
index dabcd0469461f25a79ac707cf0e785c19974cc1e..59e49a79e9e702cfcf70c47ffee3a42da8a0b537 100644 (file)
@@ -25,7 +25,7 @@ SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, stri
 }
 /***********************************************************************/
 
-SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, string output){
+SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float cu, string t, int p, bool cl, string output){
        m = MothurOut::getInstance();
        fastafile = ffile;
        namefile = name;
@@ -34,6 +34,7 @@ SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, float c
        distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that
        method = t;
        processors = p;
+    classic = cl;
        outputDir = output;
 }
 
@@ -185,7 +186,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                //process each distance file
                for (int i = 0; i < numGroups; i++) { 
                        
-                       string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff);
+                       string options = "";
+            if (classic) { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", output=lt"; }
+            else { options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(distCutoff); }
                        if (outputDir != "") { options += ", outputdir=" + outputDir; }
                        
                        Command* command = new DistanceCommand(options);
@@ -229,7 +232,9 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                for(int i=0;i<numGroups;i++){
                        string tempNameFile = namefile + "." + toString(i) + ".temp";
                        if (outputDir == "") { outputDir = m->hasPath(fastafile); }
-                       string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
+                       string tempDistFile = "";
+            if (classic) { tempDistFile =  outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "phylip.dist";}
+            else { tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist"; }
 
                        //if there are valid distances
                        ifstream fileHandle;
index 4ff316073789b694916a75fae5d2081a813d96c7..b8aa55119c810871988e76ca156b1746bd4eeb4a 100644 (file)
@@ -20,7 +20,7 @@ class SplitMatrix  {
        public:
 
                SplitMatrix(string, string, string, float, string, bool); //column formatted distance file, namesfile, cutoff, method, large
-               SplitMatrix(string, string, string, float, float, string, int, string); //fastafile, namefile, taxFile, taxcutoff, cutoff, method, processors, outputDir
+               SplitMatrix(string, string, string, float, float, string, int, bool, string); //fastafile, namefile, taxFile, taxcutoff, cutoff, method, processors, classic, outputDir
                
                ~SplitMatrix();
                int split();
@@ -33,7 +33,7 @@ class SplitMatrix  {
                string distFile, namefile, singleton, method, taxFile, fastafile, outputDir;
                vector< map< string, string> > dists;
                float cutoff, distCutoff;
-               bool large;
+               bool large, classic;
                int processors;
                                
                int splitDistance();