]> git.donarmstrong.com Git - mothur.git/blobdiff - mgclustercommand.cpp
changed hard parameter in cluster commands
[mothur.git] / mgclustercommand.cpp
index d240de6c73c98a705472e5cb50c689041a7b36dc..023f2142deb8584ad3794170b95e1b1d12889529 100644 (file)
@@ -10,7 +10,7 @@
 #include "mgclustercommand.h"
 
 //**********************************************************************************************************************
-MGClusterCommand::MGClusterCommand(string option){
+MGClusterCommand::MGClusterCommand(string option) {
        try {
                globaldata = GlobalData::getInstance();
                abort = false;
@@ -20,7 +20,7 @@ MGClusterCommand::MGClusterCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
+                       string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -72,7 +72,7 @@ MGClusterCommand::MGClusterCommand(string option){
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
                        
-                       if ((blastfile == "")) { mothurOut("When executing a mgcluster command you must provide a blastfile."); mothurOutEndLine(); abort = true; }
+                       if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
                        
                        //check for optional parameter and set defaults
                        string temp;
@@ -88,7 +88,7 @@ MGClusterCommand::MGClusterCommand(string option){
                        if (method == "not found") { method = "furthest"; }
                        
                        if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
-                       else { mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); mothurOutEndLine(); abort = true; }
+                       else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
 
                        temp = validParameter.validFile(parameters, "length", false);                   if (temp == "not found") { temp = "5"; }
                        convert(temp, length); 
@@ -104,11 +104,14 @@ MGClusterCommand::MGClusterCommand(string option){
                        
                        temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
                        hclusterWanted = isTrue(temp); 
+                       
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       hard = isTrue(temp);
                }
 
        }
        catch(exception& e) {
-               errorOut(e, "MGClusterCommand", "MGClusterCommand");
+               m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
                exit(1);
        }
 }
@@ -116,23 +119,23 @@ MGClusterCommand::MGClusterCommand(string option){
 
 void MGClusterCommand::help(){
        try {
-               mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
-               mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
-               mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
-               mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
-               mothurOut("The precision parameter's default value is 100. \n");
-               mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");       
-               mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
-               mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
-               mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
-               mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
-               mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
-               mothurOut("The mgcluster command should be in the following format: \n");
-               mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
-               mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
+               m->mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
+               m->mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
+               m->mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
+               m->mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
+               m->mothurOut("The precision parameter's default value is 100. \n");
+               m->mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");    
+               m->mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
+               m->mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
+               m->mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
+               m->mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
+               m->mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
+               m->mothurOut("The mgcluster command should be in the following format: \n");
+               m->mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
        }
        catch(exception& e) {
-               errorOut(e, "MGClusterCommand", "help");
+               m->errorOut(e, "MGClusterCommand", "help");
                exit(1);
        }
 }
@@ -164,6 +167,8 @@ int MGClusterCommand::execute(){
                list = new ListVector(nameMap->getListVector());
                RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
                
+               if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; return 0; }
+               
                start = time(NULL);
                oldList = *list;
                
@@ -176,6 +181,12 @@ int MGClusterCommand::execute(){
                openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
                openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
                
+               if (m->control_pressed) { 
+                       delete nameMap; delete read; delete list; delete rabund; 
+                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                       return 0; 
+               }
+               
                if (!hclusterWanted) {
                        //get distmatrix and overlap
                        SparseMatrix* distMatrix = read->getDistMatrix();
@@ -188,12 +199,31 @@ int MGClusterCommand::execute(){
                        else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
                        cluster->setMapWanted(true);
                        
+                       if (m->control_pressed) { 
+                               delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
+                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               return 0; 
+                       }
+                       
                        //cluster using cluster classes
                        while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
                                
                                cluster->update(cutoff);
+                               
+                               if (m->control_pressed) { 
+                                       delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
+                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                       return 0; 
+                               }
+                               
                                float dist = distMatrix->getSmallDist();
-                               float rndDist = roundDist(dist, precision);
+                               float rndDist;
+                               if (hard) {
+                                       rndDist = ceilDist(dist, precision); 
+                               }else{
+                                       rndDist = roundDist(dist, precision); 
+                               }
+
                                
                                if(previousDist <= 0.0000 && dist != previousDist){
                                        oldList.setLabel("unique");
@@ -203,6 +233,13 @@ int MGClusterCommand::execute(){
                                        if (merge) {
                                                map<string, int> seq2Bin = cluster->getSeqtoBin();
                                                ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+                                               
+                                               if (m->control_pressed) { 
+                                                       delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       return 0; 
+                                               }
+                                               
                                                temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
                                                printData(temp);
                                                delete temp;
@@ -225,6 +262,13 @@ int MGClusterCommand::execute(){
                                if (merge) {
                                        map<string, int> seq2Bin = cluster->getSeqtoBin();
                                        ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+                                       
+                                       if (m->control_pressed) { 
+                                                       delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       return 0; 
+                                       }
+                                       
                                        temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
                                        printData(temp);
                                        delete temp;
@@ -247,6 +291,12 @@ int MGClusterCommand::execute(){
                
                        //sort the distance and overlap files
                        sortHclusterFiles(distFile, overlapFile);
+                       
+                       if (m->control_pressed) { 
+                               delete nameMap;  delete list; delete rabund; 
+                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               return 0; 
+                       }
                
                        //create cluster
                        hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
@@ -255,18 +305,45 @@ int MGClusterCommand::execute(){
                        vector<seqDist> seqs; seqs.resize(1); // to start loop
                        //ifstream inHcluster;
                        //openInputFile(distFile, inHcluster);
+                       
+                       if (m->control_pressed) { 
+                               delete nameMap;  delete list; delete rabund; delete hcluster;
+                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               return 0; 
+                       }
 
                        while (seqs.size() != 0){
                
                                seqs = hcluster->getSeqs();
                                
+                               if (m->control_pressed) { 
+                                       delete nameMap;  delete list; delete rabund; delete hcluster;
+                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                       remove(distFile.c_str());
+                                       remove(overlapFile.c_str());
+                                       return 0; 
+                               }
+                               
                                for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
                                        
                                        if (seqs[i].seq1 != seqs[i].seq2) {
                
                                                hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+                                               
+                                               if (m->control_pressed) { 
+                                                       delete nameMap;  delete list; delete rabund; delete hcluster;
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       remove(distFile.c_str());
+                                                       remove(overlapFile.c_str());
+                                                       return 0; 
+                                               }
        
-                                               float rndDist = roundDist(seqs[i].dist, precision);
+                                               float rndDist;
+                                               if (hard) {
+                                                       rndDist = ceilDist(seqs[i].dist, precision); 
+                                               }else{
+                                                       rndDist = roundDist(seqs[i].dist, precision); 
+                                               }
                                                                                                
                                                if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
                                                        oldList.setLabel("unique");
@@ -276,6 +353,15 @@ int MGClusterCommand::execute(){
                                                        if (merge) {
                                                                map<string, int> seq2Bin = hcluster->getSeqtoBin();
                                                                ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+                                                               
+                                                               if (m->control_pressed) { 
+                                                                       delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
+                                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                                       remove(distFile.c_str());
+                                                                       remove(overlapFile.c_str());
+                                                                       return 0; 
+                                                               }
+
                                                                temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
                                                                printData(temp);
                                                                delete temp;
@@ -301,6 +387,15 @@ int MGClusterCommand::execute(){
                                if (merge) {
                                        map<string, int> seq2Bin = hcluster->getSeqtoBin();
                                        ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+                                       
+                                       if (m->control_pressed) { 
+                                                       delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       remove(distFile.c_str());
+                                                       remove(overlapFile.c_str());
+                                                       return 0; 
+                                       }
+                                       
                                        temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
                                        printData(temp);
                                        delete temp;
@@ -323,13 +418,28 @@ int MGClusterCommand::execute(){
        
                globaldata->setListFile(fileroot+ tag + ".list");
                globaldata->setFormat("list");
-                       
-               mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); mothurOutEndLine();
+               
+               if (m->control_pressed) { 
+                       delete nameMap; 
+                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                       globaldata->setListFile("");
+                       globaldata->setFormat("");
+                       return 0; 
+               }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();   
+               m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); 
+               m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); 
+               m->mothurOutEndLine();
+               
+               m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
                        
                return 0;
        }
        catch(exception& e) {
-               errorOut(e, "MGClusterCommand", "execute");
+               m->errorOut(e, "MGClusterCommand", "execute");
                exit(1);
        }
 }
@@ -345,7 +455,7 @@ void MGClusterCommand::printData(ListVector* mergedList){
                sabund.print(sabundFile);
        }
        catch(exception& e) {
-               errorOut(e, "MGClusterCommand", "printData");
+               m->errorOut(e, "MGClusterCommand", "printData");
                exit(1);
        }
 }
@@ -366,6 +476,10 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
                }else { if (overlapMatrix.size() == 0)  {  done = true;  } } 
                
                while (!done) {
+                       if (m->control_pressed) { 
+                               if (hclusterWanted) {   inOverlap.close();  }           
+                               return newList;
+                       }
                        
                        //get next overlap
                        seqDist overlapNode;
@@ -430,7 +544,7 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
                                
        }
        catch(exception& e) {
-               errorOut(e, "MGClusterCommand", "mergeOPFs");
+               m->errorOut(e, "MGClusterCommand", "mergeOPFs");
                exit(1);
        }
 }
@@ -438,17 +552,17 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
        try {
                //sort distFile
-               string sortedDistFile = sortFile(unsortedDist);
+               string sortedDistFile = sortFile(unsortedDist, outputDir);
                remove(unsortedDist.c_str());  //delete unsorted file
                distFile = sortedDistFile;
                
                //sort overlap file
-               string sortedOverlapFile = sortFile(unsortedOverlap);
+               string sortedOverlapFile = sortFile(unsortedOverlap, outputDir);
                remove(unsortedOverlap.c_str());  //delete unsorted file
                overlapFile = sortedOverlapFile;
        }
        catch(exception& e) {
-               errorOut(e, "MGClusterCommand", "sortHclusterFiles");
+               m->errorOut(e, "MGClusterCommand", "sortHclusterFiles");
                exit(1);
        }
 }