]> git.donarmstrong.com Git - mothur.git/blobdiff - hcluster.cpp
added diffs and percent parameters to cluster.fragments command
[mothur.git] / hcluster.cpp
index 07deaa5991c94627fa2e02a88e3b10b2729e290e..88cba6ecce59b98c874a119c84c494aa1e152156 100644 (file)
@@ -26,8 +26,8 @@ HCluster::HCluster(RAbundVector* rav, ListVector* lv, string ms, string d, NameA
                        clusterArray.push_back(temp);
                }
                
-               if (method != "average") {
-                       openInputFile(distfile, filehandle);
+               if ((method == "furthest") || (method == "nearest")) {
+                       m->openInputFile(distfile, filehandle);
                }else{  
                        processFile();  
                }
@@ -273,7 +273,7 @@ bool HCluster::update(int row, int col, float distance){
                //you don't want to cluster with yourself
                if (smallRow != smallCol) {
                        
-                       if (method != "average") {
+                       if ((method == "furthest") || (method == "nearest")) {
                                //can we cluster???
                                if (method == "nearest") { cluster = true;  }
                                else{ //assume furthest
@@ -358,7 +358,7 @@ vector<seqDist> HCluster::getSeqs(){
        try {
                vector<seqDist> sameSeqs;
                
-               if(method != "average") {
+               if ((method == "furthest") || (method == "nearest")) {
                        sameSeqs = getSeqsFNNN();
                }else{
                        sameSeqs = getSeqsAN(); 
@@ -389,7 +389,7 @@ vector<seqDist> HCluster::getSeqsFNNN(){
                //get entry
                while (!filehandle.eof()) {
                        
-                       filehandle >> firstName >> secondName >> distance;    gobble(filehandle); 
+                       filehandle >> firstName >> secondName >> distance;    m->gobble(filehandle); 
        
                        //save first one
                        if (prevDistance == -1) { prevDistance = distance; }
@@ -438,7 +438,7 @@ vector<seqDist> HCluster::getSeqsAN(){
                vector<seqDist> sameSeqs;
                prevDistance = -1;
                
-               openInputFile(distfile, filehandle, "no error"); 
+               m->openInputFile(distfile, filehandle, "no error"); 
                
                //is the smallest value in mergedMin or the distfile?
                float mergedMinDist = 10000;
@@ -446,13 +446,13 @@ vector<seqDist> HCluster::getSeqsAN(){
                if (mergedMin.size() > 0) { mergedMinDist = mergedMin[0].dist;  }
                        
                if (!filehandle.eof()) {  
-                       filehandle >> firstName >> secondName >> distance;    gobble(filehandle);
+                       filehandle >> firstName >> secondName >> distance;    m->gobble(filehandle);
                        //save first one
                        if (prevDistance == -1) { prevDistance = distance; } 
                        if (distance != -1) { //-1 means skip me
                                seqDist temp(firstName, secondName, distance);
                                sameSeqs.push_back(temp);
-                       }
+                       }else{ distance = 10000; }
                }
                
                if (mergedMinDist < distance) { //get minimum distance from mergedMin
@@ -469,7 +469,7 @@ vector<seqDist> HCluster::getSeqsAN(){
                        //get entry
                        while (!filehandle.eof()) {
                                
-                               filehandle >> firstName >> secondName >> distance;    gobble(filehandle); 
+                               filehandle >> firstName >> secondName >> distance;    m->gobble(filehandle); 
                                
                                if (prevDistance == -1) { prevDistance = distance; }
                                
@@ -511,13 +511,13 @@ int HCluster::combineFile() {
                
                string tempDistFile = distfile + ".temp";
                ofstream out;
-               openOutputFile(tempDistFile, out);
+               m->openOutputFile(tempDistFile, out);
                
                //FILE* in;
                //in = fopen(distfile.c_str(), "rb");
        
                ifstream in;
-               openInputFile(distfile, in);
+               m->openInputFile(distfile, in, "no error");
                
                int first, second;
                float dist;
@@ -550,7 +550,7 @@ int HCluster::combineFile() {
                           //since file is sorted and mergedMin is sorted 
                           //you can put the smallest distance from each through the code below and keep the file sorted
                           
-                          in >> first >> second >> dist; gobble(in);
+                          in >> first >> second >> dist; m->gobble(in);
                           
                           if (m->control_pressed) { in.close(); out.close(); remove(tempDistFile.c_str()); return 0; }
                           
@@ -638,9 +638,14 @@ int HCluster::combineFile() {
                        
                        float average;
                        if (it2Merge != smallRowColValues[1].end()) { //if yes, then average
-                               //weighted average
-                               int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
-                               average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+                               //average
+                               if (method == "average") {
+                                       int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
+                                       average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+                               }else { //weighted
+                                       average = ((itMerge->second * 1.0) + (it2Merge->second * 1.0)) / (float) 2.0;                           
+                               }
+                               
                                smallRowColValues[1].erase(it2Merge);
                                
                                seqDist temp(clusterArray[smallRow].parent, itMerge->first, average);
@@ -688,7 +693,7 @@ seqDist HCluster::getNextDist(char* buffer, int& index, int size){
                        if ((buffer[index] == 10) || (buffer[index] == 13)) { //newline in unix or windows
                                gotDist = true;
                                
-                               //gobble space
+                               //m->gobble space
                                while (index < size) {          
                                        if (isspace(buffer[index])) { index++; }
                                        else { break; }         
@@ -741,17 +746,17 @@ int HCluster::processFile() {
                float distance;
                
                ifstream in;
-               openInputFile(distfile, in);
+               m->openInputFile(distfile, in, "no error");
                
                ofstream out;
                string outTemp = distfile + ".temp";
-               openOutputFile(outTemp, out);
+               m->openOutputFile(outTemp, out);
        
                //get entry
                while (!in.eof()) {
                        if (m->control_pressed) { in.close(); out.close(); remove(outTemp.c_str()); return 0; }
                        
-                       in >> firstName >> secondName >> distance;    gobble(in);               
+                       in >> firstName >> secondName >> distance;    m->gobble(in);            
                        
                        map<string,int>::iterator itA = nameMap->find(firstName);
                        map<string,int>::iterator itB = nameMap->find(secondName);