5 * Created by westcott on 10/13/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13 #include "sparsematrix.hpp"
16 /***********************************************************************/
18 HCluster::HCluster(RAbundVector* rav, ListVector* lv, string m) : rabund(rav), list(lv), method(m){
22 numSeqs = list->getNumSeqs();
24 //initialize cluster array
25 for (int i = 0; i < numSeqs; i++) {
26 clusterNode temp(1, -1, i);
27 clusterArray.push_back(temp);
32 errorOut(e, "HCluster", "HCluster");
36 /***********************************************************************/
38 void HCluster::clusterBins(){
40 //cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild);
42 rabund->set(clusterArray[smallCol].smallChild, rabund->get(clusterArray[smallRow].smallChild)+rabund->get(clusterArray[smallCol].smallChild));
43 rabund->set(clusterArray[smallRow].smallChild, 0);
44 rabund->setLabel(toString(smallDist));
46 //cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl;
49 errorOut(e, "HCluster", "clusterBins");
56 /***********************************************************************/
58 void HCluster::clusterNames(){
60 ///cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild);
61 if (mapWanted) { updateMap(); }
63 list->set(clusterArray[smallCol].smallChild, list->get(clusterArray[smallRow].smallChild)+','+list->get(clusterArray[smallCol].smallChild));
64 list->set(clusterArray[smallRow].smallChild, "");
65 list->setLabel(toString(smallDist));
67 //cout << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild) << endl;
71 errorOut(e, "HCluster", "clusterNames");
76 /***********************************************************************/
77 int HCluster::getUpmostParent(int node){
80 while (clusterArray[node].parent != -1) {
81 node = clusterArray[node].parent;
87 errorOut(e, "HCluster", "getUpmostParent");
91 /***********************************************************************/
92 void HCluster::printInfo(){
95 cout << "link table" << endl;
96 for (it = activeLinks.begin(); it!= activeLinks.end(); it++) {
97 cout << it->first << " = " << it->second << endl;
100 for (int i = 0; i < linkTable.size(); i++) {
102 for (it = linkTable[i].begin(); it != linkTable[i].end(); it++) {
103 cout << it->first << '-' << it->second << '\t';
107 cout << endl << "clusterArray" << endl;
109 for (int i = 0; i < clusterArray.size(); i++) {
110 cout << i << '\t' << clusterArray[i].numSeq << '\t' << clusterArray[i].parent << '\t' << clusterArray[i].smallChild << endl;
116 catch(exception& e) {
117 errorOut(e, "HCluster", "getUpmostParent");
121 /***********************************************************************/
122 int HCluster::makeActive() {
126 //cout << "active - here" << endl;
127 it = activeLinks.find(smallRow);
128 it2 = activeLinks.find(smallCol);
130 if ((it == activeLinks.end()) && (it2 == activeLinks.end())) { //both are not active so add them
131 int size = linkTable.size();
132 map<int, int> temp; map<int, int> temp2;
134 //add link to eachother
135 temp[smallRow] = 1; // 1 2
136 temp2[smallCol] = 1; // 1 0 1
138 linkTable.push_back(temp);
139 linkTable.push_back(temp2);
142 activeLinks[smallRow] = size;
143 activeLinks[smallCol] = size+1;
144 //cout << "active - here1" << endl;
145 }else if ((it != activeLinks.end()) && (it2 == activeLinks.end())) { //smallRow is active, smallCol is not
146 int size = linkTable.size();
147 int alreadyActiveRow = it->second;
150 //add link to eachother
151 temp[smallRow] = 1; // 6 2 3 5
152 linkTable.push_back(temp); // 6 0 1 2 0
153 linkTable[alreadyActiveRow][smallCol] = 1; // 2 1 0 1 1
157 activeLinks[smallCol] = size;
158 //cout << "active - here2" << endl;
159 }else if ((it == activeLinks.end()) && (it2 != activeLinks.end())) { //smallCol is active, smallRow is not
160 int size = linkTable.size();
161 int alreadyActiveCol = it2->second;
164 //add link to eachother
165 temp[smallCol] = 1; // 6 2 3 5
166 linkTable.push_back(temp); // 6 0 1 2 0
167 linkTable[alreadyActiveCol][smallRow] = 1; // 2 1 0 1 1
171 activeLinks[smallRow] = size;
172 //cout << "active - here3" << endl;
173 }else { //both are active so add one
174 int row = it->second;
175 int col = it2->second;
176 //cout << row << '\t' << col << endl;
178 linkTable[row][smallCol]++;
179 linkTable[col][smallRow]++;
180 linkValue = linkTable[row][smallCol];
181 //cout << "active - here4" << endl;
186 catch(exception& e) {
187 errorOut(e, "HCluster", "makeActive");
191 /***********************************************************************/
192 void HCluster::updateArrayandLinkTable() {
194 //if cluster was made update clusterArray and linkTable
195 int size = clusterArray.size();
198 clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild);
199 clusterArray.push_back(temp);
202 clusterArray[smallRow].parent = size;
203 clusterArray[smallCol].parent = size;
205 //update linkTable by merging clustered rows and columns
206 int rowSpot = activeLinks[smallRow];
207 int colSpot = activeLinks[smallCol];
208 //cout << "here" << endl;
210 for (int i = 0; i < linkTable.size(); i++) {
211 //check if they are in map
212 it = linkTable[i].find(smallRow);
213 it2 = linkTable[i].find(smallCol);
215 if ((it!=linkTable[i].end()) && (it2!=linkTable[i].end())) { //they are both there
216 linkTable[i][size] = linkTable[i][smallRow]+linkTable[i][smallCol];
217 linkTable[i].erase(smallCol); //delete col row
218 linkTable[i].erase(smallRow); //delete col row
219 }else if ((it==linkTable[i].end()) && (it2!=linkTable[i].end())) { //only col
220 linkTable[i][size] = linkTable[i][smallCol];
221 linkTable[i].erase(smallCol); //delete col
222 }else if ((it!=linkTable[i].end()) && (it2==linkTable[i].end())) { //only row
223 linkTable[i][size] = linkTable[i][smallRow];
224 linkTable[i].erase(smallRow); //delete col
228 //cout << "here2" << endl;
230 for (it = linkTable[rowSpot].begin(); it != linkTable[rowSpot].end(); it++) {
231 it2 = linkTable[colSpot].find(it->first); //does the col also have this
233 if (it2 == linkTable[colSpot].end()) { //not there so add it
234 linkTable[colSpot][it->first] = it->second;
236 linkTable[colSpot][it->first] = it->second+it2->second;
239 //cout << "here3" << endl;
240 linkTable[colSpot].erase(size);
241 linkTable.erase(linkTable.begin()+rowSpot); //delete row
244 activeLinks.erase(smallRow);
245 activeLinks.erase(smallCol);
246 activeLinks[size] = colSpot;
248 //adjust everybody elses spot since you deleted - time vs. space
249 for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
250 if (it->second > rowSpot) { activeLinks[it->first]--; }
253 //cout << "here4" << endl;
256 catch(exception& e) {
257 errorOut(e, "HCluster", "updateArrayandLinkTable");
261 /***********************************************************************/
262 void HCluster::update(int row, int col, float distance){
267 smallDist = distance;
269 //find upmost parent of row and col
270 smallRow = getUpmostParent(smallRow);
271 smallCol = getUpmostParent(smallCol);
272 //cout << "row = " << row << " smallRow = " << smallRow << " col = " << col << " smallCol = " << smallCol << " dist = " << distance << endl;
273 //you don't want to cluster with yourself
274 if (smallRow != smallCol) {
275 //are they active in the link table
276 int linkValue = makeActive(); //after this point this nodes info is active in linkTable
278 //cout << "linkValue = " << linkValue << " times = " << (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq) << endl;
280 bool cluster = false;
282 if (method == "nearest") { cluster = true; }
283 else if (method == "average") { cout << "still working on this... " << endl; //got to figure this out
284 }else{ //assume furthest
285 if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) { cluster = true; }
289 updateArrayandLinkTable();
296 catch(exception& e) {
297 errorOut(e, "HCluster", "update");
301 /***********************************************************************/
302 void HCluster::setMapWanted(bool m) {
307 for (int i = 0; i < list->getNumBins(); i++) {
310 string names = list->get(i);
311 while (names.find_first_of(',') != -1) {
313 string name = names.substr(0,names.find_first_of(','));
314 //save name and bin number
316 names = names.substr(names.find_first_of(',')+1, names.length());
324 catch(exception& e) {
325 errorOut(e, "HCluster", "setMapWanted");
329 /***********************************************************************/
330 void HCluster::updateMap() {
332 //update location of seqs in smallRow since they move to smallCol now
333 string names = list->get(clusterArray[smallRow].smallChild);
334 while (names.find_first_of(',') != -1) {
336 string name = names.substr(0,names.find_first_of(','));
337 //save name and bin number
338 seq2Bin[name] = clusterArray[smallCol].smallChild;
339 names = names.substr(names.find_first_of(',')+1, names.length());
343 seq2Bin[names] = clusterArray[smallCol].smallChild;
345 catch(exception& e) {
346 errorOut(e, "HCluster", "updateMap");
350 //**********************************************************************************************************************
351 vector<seqDist> HCluster::getSeqs(ifstream& filehandle, NameAssignment* nameMap, float cutoff){
353 string firstName, secondName;
354 float distance, prevDistance;
355 vector<seqDist> sameSeqs;
358 //if you are not at the beginning of the file
360 sameSeqs.push_back(next);
361 prevDistance = next.dist;
366 while (!filehandle.eof()) {
368 filehandle >> firstName >> secondName >> distance; gobble(filehandle);
371 if (prevDistance == -1) { prevDistance = distance; }
373 map<string,int>::iterator itA = nameMap->find(firstName);
374 map<string,int>::iterator itB = nameMap->find(secondName);
375 if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
376 if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
379 if (distance > cutoff) { break; }
381 if (distance != -1) { //-1 means skip me
383 //are the distances the same
384 if (distance == prevDistance) { //save in vector
385 seqDist temp(itA->second, itB->second, distance);
386 sameSeqs.push_back(temp);
389 next.seq1 = itA->second;
390 next.seq2 = itB->second;
391 next.dist = distance;
398 //rndomize matching dists
399 random_shuffle(sameSeqs.begin(), sameSeqs.end());
403 catch(exception& e) {
404 errorOut(e, "HCluster", "getSeqs");
408 /***********************************************************************/