]> git.donarmstrong.com Git - mothur.git/blobdiff - mgclustercommand.cpp
fixed cluster.split with average method
[mothur.git] / mgclustercommand.cpp
index 6eb9c7d72f99590ec22b2f4b8aa758057a7f24f2..f8f3c78e6fc68c507bcc896bf0789a2ba2d7e373 100644 (file)
@@ -20,7 +20,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
+                       string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -104,6 +104,9 @@ MGClusterCommand::MGClusterCommand(string option) {
                        
                        temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
                        hclusterWanted = isTrue(temp); 
+                       
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       hard = isTrue(temp);
                }
 
        }
@@ -168,6 +171,8 @@ int MGClusterCommand::execute(){
                
                start = time(NULL);
                oldList = *list;
+               map<string, int> Seq2Bin;
+               map<string, int> oldSeq2Bin;
                
                if (method == "furthest")               { tag = "fn";  }
                else if (method == "nearest")   { tag = "nn";  }
@@ -195,6 +200,8 @@ int MGClusterCommand::execute(){
                        else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
                        else if(method == "average"){   cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
                        cluster->setMapWanted(true);
+                       Seq2Bin = cluster->getSeqtoBin();
+                       oldSeq2Bin = Seq2Bin;
                        
                        if (m->control_pressed) { 
                                delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
@@ -214,7 +221,13 @@ int MGClusterCommand::execute(){
                                }
                                
                                float dist = distMatrix->getSmallDist();
-                               float rndDist = roundDist(dist, precision);
+                               float rndDist;
+                               if (hard) {
+                                       rndDist = ceilDist(dist, precision); 
+                               }else{
+                                       rndDist = roundDist(dist, precision); 
+                               }
+
                                
                                if(previousDist <= 0.0000 && dist != previousDist){
                                        oldList.setLabel("unique");
@@ -222,8 +235,8 @@ int MGClusterCommand::execute(){
                                }
                                else if(rndDist != rndPreviousDist){
                                        if (merge) {
-                                               map<string, int> seq2Bin = cluster->getSeqtoBin();
-                                               ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+                                               Seq2Bin = cluster->getSeqtoBin();
+                                               ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
@@ -241,8 +254,10 @@ int MGClusterCommand::execute(){
                                }
                                
                                previousDist = dist;
+       cout << "prev distance = " << previousDist << " dist = " << dist << endl;
                                rndPreviousDist = rndDist;
                                oldList = *list;
+                               oldSeq2Bin = Seq2Bin;
                        }
                        
                        if(previousDist <= 0.0000){
@@ -251,8 +266,8 @@ int MGClusterCommand::execute(){
                        }
                        else if(rndPreviousDist<cutoff){
                                if (merge) {
-                                       map<string, int> seq2Bin = cluster->getSeqtoBin();
-                                       ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+                                       Seq2Bin = cluster->getSeqtoBin();
+                                       ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
@@ -292,6 +307,8 @@ int MGClusterCommand::execute(){
                        //create cluster
                        hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
                        hcluster->setMapWanted(true);
+                       Seq2Bin = cluster->getSeqtoBin();
+                       oldSeq2Bin = Seq2Bin;
                        
                        vector<seqDist> seqs; seqs.resize(1); // to start loop
                        //ifstream inHcluster;
@@ -329,7 +346,12 @@ int MGClusterCommand::execute(){
                                                        return 0; 
                                                }
        
-                                               float rndDist = roundDist(seqs[i].dist, precision);
+                                               float rndDist;
+                                               if (hard) {
+                                                       rndDist = ceilDist(seqs[i].dist, precision); 
+                                               }else{
+                                                       rndDist = roundDist(seqs[i].dist, precision); 
+                                               }
                                                                                                
                                                if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
                                                        oldList.setLabel("unique");
@@ -337,8 +359,8 @@ int MGClusterCommand::execute(){
                                                }
                                                else if((rndDist != rndPreviousDist)){
                                                        if (merge) {
-                                                               map<string, int> seq2Bin = hcluster->getSeqtoBin();
-                                                               ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+                                                               Seq2Bin = hcluster->getSeqtoBin();
+                                                               ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
                                                                
                                                                if (m->control_pressed) { 
                                                                        delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
@@ -360,6 +382,7 @@ int MGClusterCommand::execute(){
                                                previousDist = seqs[i].dist;
                                                rndPreviousDist = rndDist;
                                                oldList = *list;
+                                               oldSeq2Bin = Seq2Bin;
                                        }
                                }
                        }
@@ -371,8 +394,8 @@ int MGClusterCommand::execute(){
                        }
                        else if(rndPreviousDist<cutoff){
                                if (merge) {
-                                       map<string, int> seq2Bin = hcluster->getSeqtoBin();
-                                       ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+                                       Seq2Bin = hcluster->getSeqtoBin();
+                                       ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
@@ -392,8 +415,8 @@ int MGClusterCommand::execute(){
                        }
                        
                        delete hcluster;
-                       remove(distFile.c_str());
-                       remove(overlapFile.c_str());
+                       //remove(distFile.c_str());
+                       //remove(overlapFile.c_str());
                }
                
                delete list; 
@@ -495,15 +518,22 @@ ListVector* MGClusterCommand::mergeOPFs(map<string, int> binInfo, float dist){
                                //get names of seqs that overlap
                                string name1 = nameMap->get(overlapNode.seq1);
                                string name2 = nameMap->get(overlapNode.seq2);
-                               
+                       
                                //use binInfo to find out if they are already in the same bin
-                               int binKeep = binInfo[name1];
-                               int binRemove = binInfo[name2];
+                               map<string, int>::iterator itBin1 = binInfo.find(name1);
+                               map<string, int>::iterator itBin2 = binInfo.find(name2);
+                               
+                               if(itBin1 == binInfo.end()){  cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1);  }
+                               if(itBin2 == binInfo.end()){  cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1);  }
+
+                               int binKeep = itBin1->second;
+                               int binRemove = itBin2->second;
                                
                                //if not merge bins and update binInfo
                                if(binKeep != binRemove) {
+               
                                        //save names in old bin
-                                       string names = list->get(binRemove);
+                                       string names = newList->get(binRemove);
                                        
                                        //merge bins into name1s bin
                                        newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));