]> git.donarmstrong.com Git - mothur.git/blobdiff - mgclustercommand.cpp
added warning about merging with something above cutoff to cluster. working on chimeras
[mothur.git] / mgclustercommand.cpp
index d5b84a8505bd554dca154c2c4e90db10a401c352..d240de6c73c98a705472e5cb50c689041a7b36dc 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include "mgclustercommand.h"
-#include "readcluster.h"
 
 //**********************************************************************************************************************
 MGClusterCommand::MGClusterCommand(string option){
@@ -21,24 +20,54 @@ MGClusterCommand::MGClusterCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge"};
+                       string Array[] =  {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
                        map<string, string> parameters = parser.getParameters();
                        
                        ValidParameters validParameter;
+                       map<string,string>::iterator it;
                
                        //check to make sure all parameters are valid for command
-                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("blast");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["blast"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                       }
+
+                       
                        //check for required parameters
                        blastfile = validParameter.validFile(parameters, "blast", true);
                        if (blastfile == "not open") { abort = true; }  
                        else if (blastfile == "not found") { blastfile = ""; }
                        
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(blastfile); //if user entered a file with a path then preserve it  
+                       }
+                       
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
@@ -121,7 +150,7 @@ int MGClusterCommand::execute(){
                        nameMap->readMap();
                }else{ nameMap= new NameAssignment(); }
                
-               string fileroot = getRootName(blastfile);
+               string fileroot = outputDir + getRootName(getSimpleName(blastfile));
                string tag = "";
                time_t start;
                float previousDist = 0.00000;
@@ -138,6 +167,15 @@ int MGClusterCommand::execute(){
                start = time(NULL);
                oldList = *list;
                
+               if (method == "furthest")               { tag = "fn";  }
+               else if (method == "nearest")   { tag = "nn";  }
+               else                                                    { tag = "an";  }
+               
+               //open output files
+               openOutputFile(fileroot+ tag + ".list",  listFile);
+               openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
+               openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
+               
                if (!hclusterWanted) {
                        //get distmatrix and overlap
                        SparseMatrix* distMatrix = read->getDistMatrix();
@@ -145,21 +183,15 @@ int MGClusterCommand::execute(){
                        delete read;
                
                        //create cluster
-                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix); }
-                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix); }
-                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix); }
-                       tag = cluster->getTag();
+                       if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
+                       else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
+                       else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
                        cluster->setMapWanted(true);
                        
-                       //open output files
-                       openOutputFile(fileroot+ tag + ".list",  listFile);
-                       openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
-                       openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
-                       
                        //cluster using cluster classes
                        while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
                                
-                               cluster->update();
+                               cluster->update(cutoff);
                                float dist = distMatrix->getSmallDist();
                                float rndDist = roundDist(dist, precision);
                                
@@ -208,7 +240,6 @@ int MGClusterCommand::execute(){
                        delete cluster;
                        
                }else { //use hcluster to cluster
-                       tag = "fn";
                        //get distmatrix and overlap
                        overlapFile = read->getOverlapFile();
                        distFile = read->getDistFile(); 
@@ -218,24 +249,16 @@ int MGClusterCommand::execute(){
                        sortHclusterFiles(distFile, overlapFile);
                
                        //create cluster
-                       hcluster = new HCluster(rabund, list);
+                       hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
                        hcluster->setMapWanted(true);
                        
-                       //open output files
-                       openOutputFile(fileroot+ tag + ".list",  listFile);
-                       openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
-                       openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
-                       
-                       vector<DistNode> seqs; seqs.resize(1); // to start loop
-                       exitedBreak = false;  //lets you know if there is a distance stored in next
-                       
-                       ifstream inHcluster;
-                       openInputFile(distFile, inHcluster);
+                       vector<seqDist> seqs; seqs.resize(1); // to start loop
+                       //ifstream inHcluster;
+                       //openInputFile(distFile, inHcluster);
 
                        while (seqs.size() != 0){
                
-                               seqs = getSeqs(inHcluster);
-                               random_shuffle(seqs.begin(), seqs.end());
+                               seqs = hcluster->getSeqs();
                                
                                for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
                                        
@@ -268,7 +291,7 @@ int MGClusterCommand::execute(){
                                        }
                                }
                        }
-                       inHcluster.close();
+                       //inHcluster.close();
                        
                        if(previousDist <= 0.0000){
                                oldList.setLabel("unique");
@@ -345,7 +368,7 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
                while (!done) {
                        
                        //get next overlap
-                       DistNode overlapNode;
+                       seqDist overlapNode;
                        if (!hclusterWanted) {  
                                if (count < overlapMatrix.size()) { //do we have another node in the matrix
                                        overlapNode = overlapMatrix[count];
@@ -415,89 +438,21 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
 void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
        try {
                //sort distFile
-               ReadCluster* readCluster = new ReadCluster(unsortedDist, cutoff);       
-               readCluster->setFormat("column");
-               readCluster->read(nameMap);
-               string sortedDistFile = readCluster->getOutputFile();
-               ListVector* extralist = readCluster->getListVector();  //we get this so we can delete it and not have a memory leak
-               
+               string sortedDistFile = sortFile(unsortedDist);
                remove(unsortedDist.c_str());  //delete unsorted file
                distFile = sortedDistFile;
-               delete extralist;
-               delete readCluster;
                
                //sort overlap file
-               readCluster = new ReadCluster(unsortedOverlap, cutoff);         
-               readCluster->setFormat("column");
-               readCluster->read(nameMap);
-               string sortedOverlapFile = readCluster->getOutputFile();
-               extralist = readCluster->getListVector();  //we get this so we can delete it and not have a memory leak
-               
+               string sortedOverlapFile = sortFile(unsortedOverlap);
                remove(unsortedOverlap.c_str());  //delete unsorted file
                overlapFile = sortedOverlapFile;
-               delete extralist;
-               delete readCluster;
        }
        catch(exception& e) {
                errorOut(e, "MGClusterCommand", "sortHclusterFiles");
                exit(1);
        }
 }
-//**********************************************************************************************************************
-vector<DistNode> MGClusterCommand::getSeqs(ifstream& filehandle){
-       try {
-               string firstName, secondName;
-               float distance, prevDistance;
-               vector<DistNode> sameSeqs;
-               prevDistance = -1;
-       
-               //if you are not at the beginning of the file
-               if (exitedBreak) { 
-                       sameSeqs.push_back(next);
-                       prevDistance = next.dist;
-                       exitedBreak = false;
-               }
-       
-               //get entry
-               while (!filehandle.eof()) {
-                       
-                       filehandle >> firstName >> secondName >> distance;    gobble(filehandle); 
-       
-                       //save first one
-                       if (prevDistance == -1) { prevDistance = distance; }
-                       
-                       map<string,int>::iterator itA = nameMap->find(firstName);
-                       map<string,int>::iterator itB = nameMap->find(secondName);
-                       if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
-                       if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
-               
-                       //using cutoff
-                       if (distance > cutoff) { break; }
-               
-                       if (distance != -1) { //-1 means skip me
-                       
-                               //are the distances the same
-                               if (distance == prevDistance) { //save in vector
-                                       DistNode temp(itA->second, itB->second, distance);
-                                       sameSeqs.push_back(temp);
-                                       exitedBreak = false;
-                               }else{ 
-                                       next.seq1 = itA->second;
-                                       next.seq2 = itB->second;
-                                       next.dist = distance;
-                                       exitedBreak = true;
-                                       break;
-                               }
-                       }
-               }
-               
-               return sameSeqs;
-       }
-       catch(exception& e) {
-               errorOut(e, "MGClusterCommand", "getSeqs");
-               exit(1);
-       }
-}
+
 //**********************************************************************************************************************