]> git.donarmstrong.com Git - mothur.git/blobdiff - hcluster.cpp
added chimera.uchime
[mothur.git] / hcluster.cpp
index e1830cd8fbaacaf518f17b74a8822b475e485b53..8a596f3163c06dd55129cdc8de0f5779a5d6cb4d 100644 (file)
@@ -26,7 +26,7 @@ HCluster::HCluster(RAbundVector* rav, ListVector* lv, string ms, string d, NameA
                        clusterArray.push_back(temp);
                }
                
-               if (method != "average") {
+               if ((method == "furthest") || (method == "nearest")) {
                        m->openInputFile(distfile, filehandle);
                }else{  
                        processFile();  
@@ -273,7 +273,7 @@ bool HCluster::update(int row, int col, float distance){
                //you don't want to cluster with yourself
                if (smallRow != smallCol) {
                        
-                       if (method != "average") {
+                       if ((method == "furthest") || (method == "nearest")) {
                                //can we cluster???
                                if (method == "nearest") { cluster = true;  }
                                else{ //assume furthest
@@ -358,7 +358,7 @@ vector<seqDist> HCluster::getSeqs(){
        try {
                vector<seqDist> sameSeqs;
                
-               if(method != "average") {
+               if ((method == "furthest") || (method == "nearest")) {
                        sameSeqs = getSeqsFNNN();
                }else{
                        sameSeqs = getSeqsAN(); 
@@ -396,8 +396,8 @@ vector<seqDist> HCluster::getSeqsFNNN(){
                        
                        map<string,int>::iterator itA = nameMap->find(firstName);
                        map<string,int>::iterator itB = nameMap->find(secondName);
-                       if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
-                       if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                       if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
+                       if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                
                        //using cutoff
                        if (distance > cutoff) { break; }
@@ -517,7 +517,7 @@ int HCluster::combineFile() {
                //in = fopen(distfile.c_str(), "rb");
        
                ifstream in;
-               m->openInputFile(distfile, in);
+               m->openInputFile(distfile, in, "no error");
                
                int first, second;
                float dist;
@@ -638,9 +638,14 @@ int HCluster::combineFile() {
                        
                        float average;
                        if (it2Merge != smallRowColValues[1].end()) { //if yes, then average
-                               //weighted average
-                               int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
-                               average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+                               //average
+                               if (method == "average") {
+                                       int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
+                                       average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+                               }else { //weighted
+                                       average = ((itMerge->second * 1.0) + (it2Merge->second * 1.0)) / (float) 2.0;                           
+                               }
+                               
                                smallRowColValues[1].erase(it2Merge);
                                
                                seqDist temp(clusterArray[smallRow].parent, itMerge->first, average);
@@ -741,7 +746,7 @@ int HCluster::processFile() {
                float distance;
                
                ifstream in;
-               m->openInputFile(distfile, in);
+               m->openInputFile(distfile, in, "no error");
                
                ofstream out;
                string outTemp = distfile + ".temp";
@@ -755,8 +760,8 @@ int HCluster::processFile() {
                        
                        map<string,int>::iterator itA = nameMap->find(firstName);
                        map<string,int>::iterator itB = nameMap->find(secondName);
-                       if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
-                       if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                       if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
+                       if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                
                        //using cutoff
                        if (distance > cutoff) { break; }