]> git.donarmstrong.com Git - mothur.git/blob - cluster.cpp
fixes while testing 1.33.0
[mothur.git] / cluster.cpp
1 /*
2  *  cluster.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/14/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "cluster.hpp"
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13
14 /***********************************************************************/
15
16 Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f, float cs) :
17 rabund(rav), list(lv), dMatrix(dm), method(f), adjust(cs)
18 {
19         try {
20         
21         mapWanted = false;  //set to true by mgcluster to speed up overlap merge
22         
23         //save so you can modify as it changes in average neighbor
24         cutoff = c;
25         m = MothurOut::getInstance();
26         }
27         catch(exception& e) {
28                 m->errorOut(e, "Cluster", "Cluster");
29                 exit(1);
30         }
31 }
32 /***********************************************************************/
33 void Cluster::clusterBins(){
34         try {
35                 rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
36                 rabund->set(smallRow, 0);       
37                 rabund->setLabel(toString(smallDist));
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "Cluster", "clusterBins");
41                 exit(1);
42         }
43 }
44 /***********************************************************************/
45
46 void Cluster::clusterNames(){
47         try {
48                 if (mapWanted) {  updateMap();  }
49                 
50                 list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
51                 list->set(smallRow, "");        
52                 list->setLabel(toString(smallDist));
53     }
54         catch(exception& e) {
55                 m->errorOut(e, "Cluster", "clusterNames");
56                 exit(1);
57         }
58 }
59 /***********************************************************************/
60 void Cluster::update(double& cutOFF){
61         try {
62         smallCol = dMatrix->getSmallestCell(smallRow);
63         nColCells = dMatrix->seqVec[smallCol].size();
64         nRowCells = dMatrix->seqVec[smallRow].size();
65         
66                 vector<int> foundCol(nColCells, 0);
67         //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl;
68                 int search;
69                 bool changed;
70         
71                 for (int i=nRowCells-1;i>=0;i--) {
72             if (m->control_pressed) { break; }
73              
74                         //if you are not the smallCell
75                         if (dMatrix->seqVec[smallRow][i].index != smallCol) { 
76                 search = dMatrix->seqVec[smallRow][i].index;
77                 
78                                 bool merged = false;
79                                 for (int j=0;j<nColCells;j++) {
80                     
81                                         if (dMatrix->seqVec[smallCol][j].index != smallRow) {  //if you are not the smallest distance
82                                                 if (dMatrix->seqVec[smallCol][j].index == search) {
83                                                         foundCol[j] = 1;
84                                                         merged = true;
85                                                         changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
86                             dMatrix->updateCellCompliment(smallCol, j);
87                                                         break;
88                                                 }else if (dMatrix->seqVec[smallCol][j].index < search) { //we don't have a distance for this cell
89                             if (adjust != -1.0) { //adjust
90                                 merged = true;
91                                 PDistCell value(search, adjust); //create a distance for the missing value
92                                 int location = dMatrix->addCellSorted(smallCol, value);
93                                 changed = updateDistance(dMatrix->seqVec[smallCol][location], dMatrix->seqVec[smallRow][i]);
94                                 dMatrix->updateCellCompliment(smallCol, location);
95                                 nColCells++;
96                                 foundCol.push_back(0); //add a new found column
97                                 //adjust value
98                                 for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; }
99                                 foundCol[location] = 1;
100                             }
101                             j+=nColCells;
102                         } 
103                                         }       
104                                 }
105                                 //if not merged it you need it for warning 
106                                 if ((!merged) && (method == "average" || method == "weighted")) {  
107                                         if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {  
108                                                 cutOFF = dMatrix->seqVec[smallRow][i].dist;
109                         //cout << "changing cutoff to " << cutOFF << endl;
110                                         }
111                     
112                                 }
113                                 dMatrix->rmCell(smallRow, i);
114                         }
115                 }
116                 clusterBins();
117                 clusterNames();
118         
119                 // Special handling for singlelinkage case, not sure whether this
120                 // could be avoided
121                 for (int i=nColCells-1;i>=0;i--) {
122                         if (foundCol[i] == 0) {
123                 if (adjust != -1.0) { //adjust
124                     PDistCell value(smallCol, adjust); //create a distance for the missing value
125                     changed = updateDistance(dMatrix->seqVec[smallCol][i], value);
126                     dMatrix->updateCellCompliment(smallCol, i);
127                 }else {
128                     if (method == "average" || method == "weighted") {
129                         if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance 
130                             if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {  
131                                 cutOFF = dMatrix->seqVec[smallCol][i].dist;  
132                             }
133                         }
134                     }
135                 }
136                 dMatrix->rmCell(smallCol, i);
137                         }
138                 }
139         
140         }
141         catch(exception& e) {
142                 m->errorOut(e, "Cluster", "update");
143                 exit(1);
144         }
145 }
146 /***********************************************************************/
147 void Cluster::setMapWanted(bool f)  {  
148         try {
149                 mapWanted = f;
150                 
151         //initialize map
152                 for (int k = 0; k < list->getNumBins(); k++) {
153             
154             string names = list->get(k);
155             
156             //parse bin
157             string individual = "";
158             int binNameslength = names.size();
159             for(int j=0;j<binNameslength;j++){
160                 if(names[j] == ','){
161                     seq2Bin[individual] = k;
162                     individual = "";                            
163                 }
164                 else{  individual += names[j];  }
165             }
166             //get last name
167             seq2Bin[individual] = k;
168                 }
169                 
170         }
171         catch(exception& e) {
172                 m->errorOut(e, "Cluster", "setMapWanted");
173                 exit(1);
174         }
175 }
176 /***********************************************************************/
177 void Cluster::updateMap() {
178     try {
179                 //update location of seqs in smallRow since they move to smallCol now
180                 string names = list->get(smallRow);
181                 
182         string individual = "";
183         int binNameslength = names.size();
184         for(int j=0;j<binNameslength;j++){
185             if(names[j] == ','){
186                 seq2Bin[individual] = smallCol;
187                 individual = "";                                
188             }
189             else{  individual += names[j];  }
190         }
191         //get last name
192         seq2Bin[individual] = smallCol;         
193         
194         }
195         catch(exception& e) {
196                 m->errorOut(e, "Cluster", "updateMap");
197                 exit(1);
198         }
199 }
200 /***********************************************************************/
201
202
203