]> git.donarmstrong.com Git - mothur.git/blobdiff - mgclustercommand.cpp
mods in testing 1.16.0
[mothur.git] / mgclustercommand.cpp
index f8f3c78e6fc68c507bcc896bf0789a2ba2d7e373..3ed9bf2131a45ac7a4b0ac44d959b16bfe28cd36 100644 (file)
@@ -9,6 +9,57 @@
 
 #include "mgclustercommand.h"
 
+
+//**********************************************************************************************************************
+vector<string> MGClusterCommand::getValidParameters(){ 
+       try {
+               string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MGClusterCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+MGClusterCommand::MGClusterCommand(){  
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["list"] = tempOutNames;
+               outputTypes["rabund"] = tempOutNames;
+               outputTypes["sabund"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> MGClusterCommand::getRequiredParameters(){      
+       try {
+               string Array[] =  {"blast"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MGClusterCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> MGClusterCommand::getRequiredFiles(){   
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MGClusterCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 MGClusterCommand::MGClusterCommand(string option) {
        try {
@@ -34,6 +85,12 @@ MGClusterCommand::MGClusterCommand(string option) {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["list"] = tempOutNames;
+                       outputTypes["rabund"] = tempOutNames;
+                       outputTypes["sabund"] = tempOutNames;
+                       
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
@@ -42,7 +99,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                                it = parameters.find("blast");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["blast"] = inputDir + it->second;            }
                                }
@@ -50,7 +107,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                                it = parameters.find("name");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
@@ -65,7 +122,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                                outputDir = ""; 
-                               outputDir += hasPath(blastfile); //if user entered a file with a path then preserve it  
+                               outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it       
                        }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
@@ -97,16 +154,16 @@ MGClusterCommand::MGClusterCommand(string option) {
                        convert(temp, penalty); 
                        
                        temp = validParameter.validFile(parameters, "min", false);                              if (temp == "not found") { temp = "true"; }
-                       minWanted = isTrue(temp); 
+                       minWanted = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "merge", false);                    if (temp == "not found") { temp = "true"; }
-                       merge = isTrue(temp); 
+                       merge = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
-                       hclusterWanted = isTrue(temp); 
+                       hclusterWanted = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
-                       hard = isTrue(temp);
+                       hard = m->isTrue(temp);
                }
 
        }
@@ -153,7 +210,7 @@ int MGClusterCommand::execute(){
                        nameMap->readMap();
                }else{ nameMap= new NameAssignment(); }
                
-               string fileroot = outputDir + getRootName(getSimpleName(blastfile));
+               string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
                string tag = "";
                time_t start;
                float previousDist = 0.00000;
@@ -167,7 +224,7 @@ int MGClusterCommand::execute(){
                list = new ListVector(nameMap->getListVector());
                RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
                
-               if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; return 0; }
+               if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
                
                start = time(NULL);
                oldList = *list;
@@ -179,13 +236,14 @@ int MGClusterCommand::execute(){
                else                                                    { tag = "an";  }
                
                //open output files
-               openOutputFile(fileroot+ tag + ".list",  listFile);
-               openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
-               openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
+               m->openOutputFile(fileroot+ tag + ".list",  listFile);
+               m->openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
+               m->openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
                
                if (m->control_pressed) { 
                        delete nameMap; delete read; delete list; delete rabund; 
                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                       outputTypes.clear();
                        return 0; 
                }
                
@@ -206,9 +264,10 @@ int MGClusterCommand::execute(){
                        if (m->control_pressed) { 
                                delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
                                listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               outputTypes.clear();
                                return 0; 
                        }
-                       
+               
                        //cluster using cluster classes
                        while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
                                
@@ -217,17 +276,17 @@ int MGClusterCommand::execute(){
                                if (m->control_pressed) { 
                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
                                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                       outputTypes.clear();
                                        return 0; 
                                }
                                
                                float dist = distMatrix->getSmallDist();
                                float rndDist;
                                if (hard) {
-                                       rndDist = ceilDist(dist, precision); 
+                                       rndDist = m->ceilDist(dist, precision); 
                                }else{
-                                       rndDist = roundDist(dist, precision); 
+                                       rndDist = m->roundDist(dist, precision); 
                                }
-
                                
                                if(previousDist <= 0.0000 && dist != previousDist){
                                        oldList.setLabel("unique");
@@ -235,12 +294,12 @@ int MGClusterCommand::execute(){
                                }
                                else if(rndDist != rndPreviousDist){
                                        if (merge) {
-                                               Seq2Bin = cluster->getSeqtoBin();
                                                ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
                                                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       outputTypes.clear();
                                                        return 0; 
                                                }
                                                
@@ -252,11 +311,11 @@ int MGClusterCommand::execute(){
                                                printData(&oldList);
                                        }
                                }
-                               
+       
                                previousDist = dist;
-       cout << "prev distance = " << previousDist << " dist = " << dist << endl;
                                rndPreviousDist = rndDist;
                                oldList = *list;
+                               Seq2Bin = cluster->getSeqtoBin();
                                oldSeq2Bin = Seq2Bin;
                        }
                        
@@ -266,12 +325,12 @@ int MGClusterCommand::execute(){
                        }
                        else if(rndPreviousDist<cutoff){
                                if (merge) {
-                                       Seq2Bin = cluster->getSeqtoBin();
                                        ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
                                                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       outputTypes.clear();
                                                        return 0; 
                                        }
                                        
@@ -301,6 +360,7 @@ int MGClusterCommand::execute(){
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; 
                                listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               outputTypes.clear();
                                return 0; 
                        }
                
@@ -312,11 +372,12 @@ int MGClusterCommand::execute(){
                        
                        vector<seqDist> seqs; seqs.resize(1); // to start loop
                        //ifstream inHcluster;
-                       //openInputFile(distFile, inHcluster);
+                       //m->openInputFile(distFile, inHcluster);
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; delete hcluster;
                                listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               outputTypes.clear();
                                return 0; 
                        }
 
@@ -329,6 +390,7 @@ int MGClusterCommand::execute(){
                                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
                                        remove(distFile.c_str());
                                        remove(overlapFile.c_str());
+                                       outputTypes.clear();
                                        return 0; 
                                }
                                
@@ -343,14 +405,15 @@ int MGClusterCommand::execute(){
                                                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
                                                        remove(distFile.c_str());
                                                        remove(overlapFile.c_str());
+                                                       outputTypes.clear();
                                                        return 0; 
                                                }
        
                                                float rndDist;
                                                if (hard) {
-                                                       rndDist = ceilDist(seqs[i].dist, precision); 
+                                                       rndDist = m->ceilDist(seqs[i].dist, precision); 
                                                }else{
-                                                       rndDist = roundDist(seqs[i].dist, precision); 
+                                                       rndDist = m->roundDist(seqs[i].dist, precision); 
                                                }
                                                                                                
                                                if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
@@ -359,7 +422,6 @@ int MGClusterCommand::execute(){
                                                }
                                                else if((rndDist != rndPreviousDist)){
                                                        if (merge) {
-                                                               Seq2Bin = hcluster->getSeqtoBin();
                                                                ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
                                                                
                                                                if (m->control_pressed) { 
@@ -367,6 +429,7 @@ int MGClusterCommand::execute(){
                                                                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
                                                                        remove(distFile.c_str());
                                                                        remove(overlapFile.c_str());
+                                                                       outputTypes.clear();
                                                                        return 0; 
                                                                }
 
@@ -382,6 +445,7 @@ int MGClusterCommand::execute(){
                                                previousDist = seqs[i].dist;
                                                rndPreviousDist = rndDist;
                                                oldList = *list;
+                                               Seq2Bin = cluster->getSeqtoBin();
                                                oldSeq2Bin = Seq2Bin;
                                        }
                                }
@@ -394,7 +458,6 @@ int MGClusterCommand::execute(){
                        }
                        else if(rndPreviousDist<cutoff){
                                if (merge) {
-                                       Seq2Bin = hcluster->getSeqtoBin();
                                        ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
                                        
                                        if (m->control_pressed) { 
@@ -402,6 +465,7 @@ int MGClusterCommand::execute(){
                                                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
                                                        remove(distFile.c_str());
                                                        remove(overlapFile.c_str());
+                                                       outputTypes.clear();
                                                        return 0; 
                                        }
                                        
@@ -415,8 +479,8 @@ int MGClusterCommand::execute(){
                        }
                        
                        delete hcluster;
-                       //remove(distFile.c_str());
-                       //remove(overlapFile.c_str());
+                       remove(distFile.c_str());
+                       remove(overlapFile.c_str());
                }
                
                delete list; 
@@ -433,14 +497,15 @@ int MGClusterCommand::execute(){
                        listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
                        globaldata->setListFile("");
                        globaldata->setFormat("");
+                       outputTypes.clear();
                        return 0; 
                }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   
-               m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); 
-               m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); 
+               m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+               m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
+               m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
                m->mothurOutEndLine();
                
                m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
@@ -475,12 +540,13 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
        try {
                //create new listvector so you don't overwrite the clustering
                ListVector* newList = new ListVector(oldList);
+
                bool done = false;
                ifstream inOverlap;
                int count = 0;
                
                if (hclusterWanted) {  
-                       openInputFile(overlapFile, inOverlap);  
+                       m->openInputFile(overlapFile, inOverlap);  
                        if (inOverlap.eof()) {  done = true;  }
                }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
                
@@ -501,15 +567,18 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
                                if (!inOverlap.eof()) {
                                        string firstName, secondName;
                                        float overlapDistance;
-                                       inOverlap >> firstName >> secondName >> overlapDistance; gobble(inOverlap);
+                                       inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
                                        
-                                       map<string,int>::iterator itA = nameMap->find(firstName);
-                                       map<string,int>::iterator itB = nameMap->find(secondName);
-                                       if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
-                                       if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                                       //commented out because we check this in readblast already
+                                       //map<string,int>::iterator itA = nameMap->find(firstName);
+                                       //map<string,int>::iterator itB = nameMap->find(secondName);
+                                       //if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
+                                       //if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
                                        
-                                       overlapNode.seq1 = itA->second;
-                                       overlapNode.seq2 = itB->second;
+                                       //overlapNode.seq1 = itA->second;
+                                       //overlapNode.seq2 = itB->second;
+                                       overlapNode.seq1 = nameMap->get(firstName);
+                                       overlapNode.seq2 = nameMap->get(secondName);
                                        overlapNode.dist = overlapDistance;
                                }else { inOverlap.close(); break; }
                        } 
@@ -520,21 +589,24 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
                                string name2 = nameMap->get(overlapNode.seq2);
                        
                                //use binInfo to find out if they are already in the same bin
-                               map<string, int>::iterator itBin1 = binInfo.find(name1);
-                               map<string, int>::iterator itBin2 = binInfo.find(name2);
+                               //map<string, int>::iterator itBin1 = binInfo.find(name1);
+                               //map<string, int>::iterator itBin2 = binInfo.find(name2);
                                
-                               if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
-                               if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
+                               //if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
+                               //if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
 
-                               int binKeep = itBin1->second;
-                               int binRemove = itBin2->second;
+                               //int binKeep = itBin1->second;
+                               //int binRemove = itBin2->second;
                                
+                               int binKeep = binInfo[name1];
+                               int binRemove = binInfo[name2];
+                       
                                //if not merge bins and update binInfo
                                if(binKeep != binRemove) {
                
                                        //save names in old bin
                                        string names = newList->get(binRemove);
-                                       
+               
                                        //merge bins into name1s bin
                                        newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
                                        newList->set(binRemove, "");    
@@ -568,12 +640,12 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
        try {
                //sort distFile
-               string sortedDistFile = sortFile(unsortedDist, outputDir);
+               string sortedDistFile = m->sortFile(unsortedDist, outputDir);
                remove(unsortedDist.c_str());  //delete unsorted file
                distFile = sortedDistFile;
                
                //sort overlap file
-               string sortedOverlapFile = sortFile(unsortedOverlap, outputDir);
+               string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
                remove(unsortedOverlap.c_str());  //delete unsorted file
                overlapFile = sortedOverlapFile;
        }