]> git.donarmstrong.com Git - mothur.git/blobdiff - mgclustercommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / mgclustercommand.cpp
index 5a790afd45bbfcc2c92d23827aa58a184f257cee..4ca0cdfb9af8f71338be56a9649ec4c6f8a07a87 100644 (file)
@@ -14,6 +14,7 @@ vector<string> MGClusterCommand::setParameters(){
        try {
                CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
                CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
                CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
                CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
                CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
@@ -61,6 +62,28 @@ string MGClusterCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){       
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "list") {  outputFileName =  "list"; }
+            else if (type == "rabund") {  outputFileName =  "rabund"; }
+            else if (type == "sabund") {  outputFileName =  "sabund"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 MGClusterCommand::MGClusterCommand(){  
        try {
                abort = true; calledHelp = true; 
@@ -141,17 +164,21 @@ MGClusterCommand::MGClusterCommand(string option) {
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
                        
                        if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
                        
                        //check for optional parameter and set defaults
                        string temp;
+            temp = validParameter.validFile(parameters, "large", false);                       if (temp == "not found") { temp = "false"; }            
+                       large = m->isTrue(temp); 
+            
                        temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
                        precisionLength = temp.length();
-                       convert(temp, precision); 
+                       m->mothurConvert(temp, precision); 
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "0.70"; }
-                       convert(temp, cutoff); 
+                       m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));
                        
                        method = validParameter.validFile(parameters, "method", false);
@@ -161,10 +188,10 @@ MGClusterCommand::MGClusterCommand(string option) {
                        else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 
                        temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
-                       convert(temp, length); 
+                       m->mothurConvert(temp, length); 
                        
                        temp = validParameter.validFile(parameters, "penalty", false);                  if (temp == "not found") { temp = "0.10"; }
-                       convert(temp, penalty); 
+                       m->mothurConvert(temp, penalty); 
                        
                        temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
                        minWanted = m->isTrue(temp); 
@@ -176,7 +203,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                        hclusterWanted = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
-                       hard = m->isTrue(temp);
+                       hard = m->isTrue(temp);            
                }
 
        }
@@ -188,7 +215,6 @@ MGClusterCommand::MGClusterCommand(string option) {
 //**********************************************************************************************************************
 int MGClusterCommand::execute(){
        try {
-               
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                //read names file
@@ -201,15 +227,27 @@ int MGClusterCommand::execute(){
                string tag = "";
                time_t start;
                float previousDist = 0.00000;
-               float rndPreviousDist = 0.00000;
-               
+               float rndPreviousDist = 0.00000; 
+        
                //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
                //must remember to delete those objects here since readBlast does not
                read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
                read->read(nameMap);
-               
-               list = new ListVector(nameMap->getListVector());
-               RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+        
+        list = new ListVector(nameMap->getListVector());
+        RAbundVector* rabund = NULL;
+        
+        if(large) {
+            map<string, int> nameMapCounts = m->readNames(namefile);
+            createRabund(nameMapCounts);
+            rabund = &rav;
+        }else {
+            rabund = new RAbundVector(list->getRAbundVector());
+        }
+        
+                
+               //list = new ListVector(nameMap->getListVector());
+               //rabund = new RAbundVector(list->getRAbundVector());
                
                if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
                
@@ -222,18 +260,23 @@ int MGClusterCommand::execute(){
                else if (method == "nearest")   { tag = "nn";  }
                else                                                    { tag = "an";  }
                
-               //open output files
-               m->openOutputFile(fileroot+ tag + ".list",  listFile);
-               m->openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
-               m->openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
+        string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
+        string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
+        string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
+        
+               m->openOutputFile(sabundFileName,       sabundFile);
+               m->openOutputFile(rabundFileName,       rabundFile);
+               m->openOutputFile(listFileName, listFile);
                
                if (m->control_pressed) { 
                        delete nameMap; delete read; delete list; delete rabund; 
-                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                        outputTypes.clear();
                        return 0; 
                }
                
+               double saveCutoff = cutoff;
+               
                if (!hclusterWanted) {
                        //get distmatrix and overlap
                        SparseMatrix* distMatrix = read->getDistMatrix();
@@ -250,7 +293,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
@@ -262,7 +305,7 @@ int MGClusterCommand::execute(){
                                
                                if (m->control_pressed) { 
                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
-                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                        outputTypes.clear();
                                        return 0; 
                                }
@@ -285,7 +328,7 @@ int MGClusterCommand::execute(){
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
@@ -316,7 +359,7 @@ int MGClusterCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
@@ -346,7 +389,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; 
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
@@ -363,7 +406,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; delete hcluster;
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
@@ -372,11 +415,16 @@ int MGClusterCommand::execute(){
                
                                seqs = hcluster->getSeqs();
                                
+                               //to account for cutoff change in average neighbor
+                               if (seqs.size() != 0) {
+                                       if (seqs[0].dist > cutoff) { break; }
+                               }
+                               
                                if (m->control_pressed) { 
                                        delete nameMap;  delete list; delete rabund; delete hcluster;
-                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                       remove(distFile.c_str());
-                                       remove(overlapFile.c_str());
+                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                       m->mothurRemove(distFile);
+                                       m->mothurRemove(overlapFile);
                                        outputTypes.clear();
                                        return 0; 
                                }
@@ -385,13 +433,13 @@ int MGClusterCommand::execute(){
                                        
                                        if (seqs[i].seq1 != seqs[i].seq2) {
                
-                                               hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+                                               cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap;  delete list; delete rabund; delete hcluster;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                       remove(distFile.c_str());
-                                                       remove(overlapFile.c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       m->mothurRemove(distFile);
+                                                       m->mothurRemove(overlapFile);
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
@@ -413,9 +461,9 @@ int MGClusterCommand::execute(){
                                                                
                                                                if (m->control_pressed) { 
                                                                        delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
-                                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                                       remove(distFile.c_str());
-                                                                       remove(overlapFile.c_str());
+                                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                                       m->mothurRemove(distFile);
+                                                                       m->mothurRemove(overlapFile);
                                                                        outputTypes.clear();
                                                                        return 0; 
                                                                }
@@ -449,9 +497,9 @@ int MGClusterCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                       remove(distFile.c_str());
-                                                       remove(overlapFile.c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       m->mothurRemove(distFile);
+                                                       m->mothurRemove(overlapFile);
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
@@ -466,30 +514,37 @@ int MGClusterCommand::execute(){
                        }
                        
                        delete hcluster;
-                       remove(distFile.c_str());
-                       remove(overlapFile.c_str());
+                       m->mothurRemove(distFile);
+                       m->mothurRemove(overlapFile);
                }
                
-               delete list; 
-               delete rabund;
+               delete list;
+               if (!large) {delete rabund;}
                listFile.close();
                sabundFile.close();
                rabundFile.close();
        
                if (m->control_pressed) { 
                        delete nameMap; 
-                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                        outputTypes.clear();
                        return 0; 
                }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
-               m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
-               m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
+               m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
+               m->mothurOut(rabundFileName); m->mothurOutEndLine();    outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+               m->mothurOut(sabundFileName); m->mothurOutEndLine();    outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
                m->mothurOutEndLine();
                
+               if (saveCutoff != cutoff) { 
+                       if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+                       else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       
+                       m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
+               }
+               
                //set list file as new current listfile
                string current = "";
                itTypes = outputTypes.find("list");
@@ -643,12 +698,12 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve
        try {
                //sort distFile
                string sortedDistFile = m->sortFile(unsortedDist, outputDir);
-               remove(unsortedDist.c_str());  //delete unsorted file
+               m->mothurRemove(unsortedDist);  //delete unsorted file
                distFile = sortedDistFile;
                
                //sort overlap file
                string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
-               remove(unsortedOverlap.c_str());  //delete unsorted file
+               m->mothurRemove(unsortedOverlap);  //delete unsorted file
                overlapFile = sortedOverlapFile;
        }
        catch(exception& e) {
@@ -659,7 +714,22 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve
 
 //**********************************************************************************************************************
 
+void MGClusterCommand::createRabund(map<string, int> nameMapCounts){
+    try {
+        //RAbundVector rav;
+        map<string,int>::iterator it;
+        //it = nameMapCounts.begin();
+        //for(int i = 0; i < list->getNumBins(); i++) {   rav.push_back((*it).second); it++;    }
+        for ( it=nameMapCounts.begin(); it!=nameMapCounts.end(); it++ ) {    rav.push_back( it->second );    }
+        //return rav;
+    }
+    catch(exception& e) {
+               m->errorOut(e, "MGClusterCommand", "createRabund");
+               exit(1);
+       }
+    
+}
 
-
+//**********************************************************************************************************************