]> git.donarmstrong.com Git - mothur.git/blob - readcolumn.cpp
added hcluster command and fixed some bugs, namely one with smart distancing.
[mothur.git] / readcolumn.cpp
1 /*\r
2  *  readcolumn.cpp\r
3  *  Mothur\r
4  *\r
5  *  Created by Sarah Westcott on 4/21/09.\r
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.\r
7  *\r
8  */\r
9 \r
10 #include "readcolumn.h"\r
11 #include "progress.hpp"\r
12 \r
13 /***********************************************************************/\r
14 \r
15 ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){\r
16         \r
17         successOpen = openInputFile(distFile, fileHandle);\r
18         \r
19 }\r
20 \r
21 /***********************************************************************/\r
22 \r
23 void ReadColumnMatrix::read(NameAssignment* nameMap){\r
24         try {           \r
25 \r
26                 string firstName, secondName;\r
27                 float distance;\r
28                 int nseqs = nameMap->size();\r
29 \r
30                 list = new ListVector(nameMap->getListVector());\r
31         \r
32                 Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);\r
33 \r
34                 int lt = 1;\r
35                 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose\r
36                 int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix\r
37 \r
38                 //need to see if this is a square or a triangular matrix...\r
39         \r
40                 while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...\r
41                 \r
42                         fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance\r
43         \r
44                         map<string,int>::iterator itA = nameMap->find(firstName);\r
45                         map<string,int>::iterator itB = nameMap->find(secondName);\r
46                         \r
47                         if(itA == nameMap->end()){\r
48                                 cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";\r
49                         }\r
50                         if(itB == nameMap->end()){\r
51                                 cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n";\r
52                         }\r
53 \r
54                         if (distance == -1) { distance = 1000000; }\r
55                         \r
56                         if(distance < cutoff && itA != itB){\r
57                                 if(itA->second > itB->second){\r
58                                         PCell value(itA->second, itB->second, distance);\r
59                         \r
60                                         if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
61                                                 refRow = itA->second;\r
62                                                 refCol = itB->second;\r
63                                                 D->addCell(value);\r
64                                         }\r
65                                         else if(refRow == itA->second && refCol == itB->second){\r
66                                                 lt = 0;\r
67                                         }\r
68                                         else{\r
69                                                 D->addCell(value);\r
70                                         }\r
71                                 }\r
72                                 else if(itA->second < itB->second){\r
73                                         PCell value(itB->second, itA->second, distance);\r
74                         \r
75                                         if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
76                                                 refRow = itA->second;\r
77                                                 refCol = itB->second;\r
78                                                 D->addCell(value);\r
79                                         }\r
80                                         else if(refRow == itB->second && refCol == itA->second){\r
81                                                 lt = 0;\r
82                                         }\r
83                                         else{\r
84                                                 D->addCell(value);\r
85                                         }\r
86                                 }\r
87                                 reading->update(itA->second * nseqs);\r
88                         }\r
89                         gobble(fileHandle);\r
90                 }\r
91 \r
92                 if(lt == 0){  // oops, it was square\r
93                         fileHandle.close();  //let's start over\r
94                         D->clear();  //let's start over\r
95                    \r
96                         openInputFile(distFile, fileHandle);  //let's start over\r
97 \r
98                         while(fileHandle){\r
99                                 fileHandle >> firstName >> secondName >> distance;\r
100                 \r
101                                 map<string,int>::iterator itA = nameMap->find(firstName);\r
102                                 map<string,int>::iterator itB = nameMap->find(secondName);\r
103                                 \r
104                                 if(itA == nameMap->end()){\r
105                                         cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";\r
106                                 }\r
107                                 if(itB == nameMap->end()){\r
108                                         cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";\r
109                                 }\r
110                                 \r
111                                 if (distance == -1) { distance = 1000000; }\r
112                                 \r
113                                 if(distance < cutoff && itA->second > itB->second){\r
114                                         PCell value(itA->second, itB->second, distance);\r
115                                         D->addCell(value);\r
116                                         reading->update(itA->second * nseqs);\r
117                                 }\r
118                 \r
119                                 gobble(fileHandle);\r
120                         }\r
121                 }\r
122 \r
123                 reading->finish();\r
124                 fileHandle.close();\r
125 \r
126                 list->setLabel("0");\r
127 \r
128         }\r
129         catch(exception& e) {\r
130                 errorOut(e, "ReadColumnMatrix", "read");\r
131                 exit(1);\r
132         }\r
133 }\r
134 \r
135 /***********************************************************************/\r
136 \r
137 ReadColumnMatrix::~ReadColumnMatrix(){\r
138         //delete D;\r
139         //delete list;\r
140 }\r
141 \r
142 \r