]> git.donarmstrong.com Git - mothur.git/blobdiff - mgclustercommand.cpp
added forward and reverse barcodes to trim.seqs to process illumina seqs
[mothur.git] / mgclustercommand.cpp
index 10932bc065ca69f6546503e3c0d402571d27cdc2..9c45afe95277b5c7106e3f87f64e60e53b9bf321 100644 (file)
@@ -18,8 +18,8 @@ vector<string> MGClusterCommand::setParameters(){
                CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
                CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
                CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
                CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
                CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
                CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
-               CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "furthest", "", "", "",false,false); parameters.push_back(pmethod);
-               CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard);
+               CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
+               CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
                CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
                CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
                CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
                CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
                CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
                CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
@@ -44,7 +44,7 @@ string MGClusterCommand::getHelpString(){
                helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
                helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
                helpString += "The precision parameter's default value is 100. \n";
                helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
                helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
                helpString += "The precision parameter's default value is 100. \n";
-               helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n";      
+               helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
                helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
                helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
                helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
                helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
                helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
                helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
@@ -82,6 +82,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
                        vector<string> myArray = setParameters();
                
                else {
                        vector<string> myArray = setParameters();
@@ -140,6 +141,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
                        
                        if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
                        
                        
                        if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
                        
@@ -147,23 +149,23 @@ MGClusterCommand::MGClusterCommand(string option) {
                        string temp;
                        temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
                        precisionLength = temp.length();
                        string temp;
                        temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
                        precisionLength = temp.length();
-                       convert(temp, precision); 
+                       m->mothurConvert(temp, precision); 
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
-                       convert(temp, cutoff); 
+                       m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));
                        
                        method = validParameter.validFile(parameters, "method", false);
                        cutoff += (5 / (precision * 10.0));
                        
                        method = validParameter.validFile(parameters, "method", false);
-                       if (method == "not found") { method = "furthest"; }
+                       if (method == "not found") { method = "average"; }
                        
                        if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
                        else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 
                        temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
                        
                        if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
                        else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 
                        temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
-                       convert(temp, length); 
+                       m->mothurConvert(temp, length); 
                        
                        temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
                        
                        temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
-                       convert(temp, penalty); 
+                       m->mothurConvert(temp, penalty); 
                        
                        temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
                        minWanted = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
                        minWanted = m->isTrue(temp); 
@@ -174,7 +176,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                        temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
                        hclusterWanted = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
                        hclusterWanted = m->isTrue(temp); 
                        
-                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
                        hard = m->isTrue(temp);
                }
 
                        hard = m->isTrue(temp);
                }
 
@@ -228,11 +230,13 @@ int MGClusterCommand::execute(){
                
                if (m->control_pressed) { 
                        delete nameMap; delete read; delete list; delete rabund; 
                
                if (m->control_pressed) { 
                        delete nameMap; delete read; delete list; delete rabund; 
-                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                        outputTypes.clear();
                        return 0; 
                }
                
                        outputTypes.clear();
                        return 0; 
                }
                
+               double saveCutoff = cutoff;
+               
                if (!hclusterWanted) {
                        //get distmatrix and overlap
                        SparseMatrix* distMatrix = read->getDistMatrix();
                if (!hclusterWanted) {
                        //get distmatrix and overlap
                        SparseMatrix* distMatrix = read->getDistMatrix();
@@ -249,7 +253,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
                        
                        if (m->control_pressed) { 
                                delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
                                outputTypes.clear();
                                return 0; 
                        }
@@ -261,7 +265,7 @@ int MGClusterCommand::execute(){
                                
                                if (m->control_pressed) { 
                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
                                
                                if (m->control_pressed) { 
                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
-                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                        outputTypes.clear();
                                        return 0; 
                                }
                                        outputTypes.clear();
                                        return 0; 
                                }
@@ -284,7 +288,7 @@ int MGClusterCommand::execute(){
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
@@ -315,7 +319,7 @@ int MGClusterCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
@@ -345,7 +349,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; 
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; 
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
                                outputTypes.clear();
                                return 0; 
                        }
@@ -362,7 +366,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; delete hcluster;
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; delete hcluster;
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
                                outputTypes.clear();
                                return 0; 
                        }
@@ -371,11 +375,16 @@ int MGClusterCommand::execute(){
                
                                seqs = hcluster->getSeqs();
                                
                
                                seqs = hcluster->getSeqs();
                                
+                               //to account for cutoff change in average neighbor
+                               if (seqs.size() != 0) {
+                                       if (seqs[0].dist > cutoff) { break; }
+                               }
+                               
                                if (m->control_pressed) { 
                                        delete nameMap;  delete list; delete rabund; delete hcluster;
                                if (m->control_pressed) { 
                                        delete nameMap;  delete list; delete rabund; delete hcluster;
-                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                       remove(distFile.c_str());
-                                       remove(overlapFile.c_str());
+                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                       m->mothurRemove(distFile);
+                                       m->mothurRemove(overlapFile);
                                        outputTypes.clear();
                                        return 0; 
                                }
                                        outputTypes.clear();
                                        return 0; 
                                }
@@ -384,13 +393,13 @@ int MGClusterCommand::execute(){
                                        
                                        if (seqs[i].seq1 != seqs[i].seq2) {
                
                                        
                                        if (seqs[i].seq1 != seqs[i].seq2) {
                
-                                               hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+                                               cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap;  delete list; delete rabund; delete hcluster;
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap;  delete list; delete rabund; delete hcluster;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                       remove(distFile.c_str());
-                                                       remove(overlapFile.c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       m->mothurRemove(distFile);
+                                                       m->mothurRemove(overlapFile);
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
@@ -412,9 +421,9 @@ int MGClusterCommand::execute(){
                                                                
                                                                if (m->control_pressed) { 
                                                                        delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
                                                                
                                                                if (m->control_pressed) { 
                                                                        delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
-                                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                                       remove(distFile.c_str());
-                                                                       remove(overlapFile.c_str());
+                                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                                       m->mothurRemove(distFile);
+                                                                       m->mothurRemove(overlapFile);
                                                                        outputTypes.clear();
                                                                        return 0; 
                                                                }
                                                                        outputTypes.clear();
                                                                        return 0; 
                                                                }
@@ -448,9 +457,9 @@ int MGClusterCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                       remove(distFile.c_str());
-                                                       remove(overlapFile.c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       m->mothurRemove(distFile);
+                                                       m->mothurRemove(overlapFile);
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
@@ -465,8 +474,8 @@ int MGClusterCommand::execute(){
                        }
                        
                        delete hcluster;
                        }
                        
                        delete hcluster;
-                       remove(distFile.c_str());
-                       remove(overlapFile.c_str());
+                       m->mothurRemove(distFile);
+                       m->mothurRemove(overlapFile);
                }
                
                delete list; 
                }
                
                delete list; 
@@ -477,7 +486,7 @@ int MGClusterCommand::execute(){
        
                if (m->control_pressed) { 
                        delete nameMap; 
        
                if (m->control_pressed) { 
                        delete nameMap; 
-                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                        outputTypes.clear();
                        return 0; 
                }
                        outputTypes.clear();
                        return 0; 
                }
@@ -489,6 +498,13 @@ int MGClusterCommand::execute(){
                m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
                m->mothurOutEndLine();
                
                m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
                m->mothurOutEndLine();
                
+               if (saveCutoff != cutoff) { 
+                       if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+                       else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       
+                       m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
+               }
+               
                //set list file as new current listfile
                string current = "";
                itTypes = outputTypes.find("list");
                //set list file as new current listfile
                string current = "";
                itTypes = outputTypes.find("list");
@@ -642,12 +658,12 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve
        try {
                //sort distFile
                string sortedDistFile = m->sortFile(unsortedDist, outputDir);
        try {
                //sort distFile
                string sortedDistFile = m->sortFile(unsortedDist, outputDir);
-               remove(unsortedDist.c_str());  //delete unsorted file
+               m->mothurRemove(unsortedDist);  //delete unsorted file
                distFile = sortedDistFile;
                
                //sort overlap file
                string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
                distFile = sortedDistFile;
                
                //sort overlap file
                string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
-               remove(unsortedOverlap.c_str());  //delete unsorted file
+               m->mothurRemove(unsortedOverlap);  //delete unsorted file
                overlapFile = sortedOverlapFile;
        }
        catch(exception& e) {
                overlapFile = sortedOverlapFile;
        }
        catch(exception& e) {