]> git.donarmstrong.com Git - mothur.git/blob - readcluster.cpp
Revert to previous commit
[mothur.git] / readcluster.cpp
1 /*
2  *  readcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/28/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "readcluster.h"
11
12 /***********************************************************************/
13
14 ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
15                 m = MothurOut::getInstance();
16         distFile = distfile;
17                 cutoff = c;
18                 outputDir = o;
19                 sortWanted = s;
20                 list = NULL;
21 }
22
23 /***********************************************************************/
24
25 int ReadCluster::read(NameAssignment*& nameMap){
26         try {
27         
28                 if (format == "phylip") { convertPhylip2Column(nameMap); }
29                 else { list = new ListVector(nameMap->getListVector());  }
30                 
31                 if (m->control_pressed) { return 0; }
32                 
33                 if (sortWanted) {  OutPutFile = m->sortFile(distFile, outputDir);  }
34                 else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
35                 
36                 return 0;
37                         
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "ReadCluster", "read");
41                 exit(1);
42         }
43 }
44 /***********************************************************************/
45
46 int ReadCluster::convertPhylip2Column(NameAssignment*& nameMap){
47         try {   
48                 //convert phylip file to column file
49                 map<int, string> rowToName;
50                 map<int, string>::iterator it;
51                 
52                 ifstream in;
53                 ofstream out;
54                 string tempFile = distFile + ".column.temp";
55                 
56                 m->openInputFile(distFile, in);  m->gobble(in);
57                 m->openOutputFile(tempFile, out);
58                 
59                 float distance;
60                 int square, nseqs;
61                 string name;
62                 vector<string> matrixNames;
63                 
64                 string numTest;
65                 in >> numTest >> name;
66                 
67                 if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
68                 else { convert(numTest, nseqs); }
69                 
70                 rowToName[0] = name;
71                 matrixNames.push_back(name);
72                 
73                 if(nameMap == NULL){
74                         list = new ListVector(nseqs);
75                         list->set(0, name);
76                 }
77                 else{
78                         list = new ListVector(nameMap->getListVector());
79                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
80                 }
81         
82                 char d;
83                 while((d=in.get()) != EOF){
84                         
85                         if(isalnum(d)){
86                                 square = 1;
87                                 in.putback(d);
88                                 for(int i=0;i<nseqs;i++){
89                                         in >> distance;
90                                 }
91                                 break;
92                         }
93                         if(d == '\n'){
94                                 square = 0;
95                                 break;
96                         }
97                 }
98         
99                 if(square == 0){
100                                         
101                         for(int i=1;i<nseqs;i++){
102                                 in >> name;
103                                 rowToName[i] = name;
104                                 matrixNames.push_back(name);
105                                 
106                                 //there's A LOT of repeated code throughout this method...
107                                 if(nameMap == NULL){
108                                         list->set(i, name);
109                                         
110                                         for(int j=0;j<i;j++){
111                                         
112                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
113                                                 
114                                                 in >> distance;
115                                                 
116                                                 if (distance == -1) { distance = 1000000; }
117                                                 
118                                                 if(distance < cutoff){
119                                                         out << i << '\t' << j << '\t' << distance << endl;
120                                                 }
121                                         }
122                                         
123                                 }
124                                 else{
125                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
126                                         
127                                         for(int j=0;j<i;j++){
128                                                 
129                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
130                                                 
131                                                 in >> distance;
132                                                 
133                                                 if (distance == -1) { distance = 1000000; }
134                                                 
135                                                 if(distance < cutoff){
136                                                         out << i << '\t' << j << '\t' << distance << endl;
137                                                 }
138                                         }
139                                 }
140                         }
141                 }
142                 else{
143                         for(int i=1;i<nseqs;i++){
144                                 in >> name;                
145                                 rowToName[i] = name;
146                                 matrixNames.push_back(name);
147                 
148                                 if(nameMap == NULL){
149                                         list->set(i, name);
150                                         for(int j=0;j<nseqs;j++){
151                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
152                                                 
153                                                 in >> distance;
154                                         
155                                                 if (distance == -1) { distance = 1000000; }
156                                                 
157                                                 if(distance < cutoff && j < i){
158                                                         out << i << '\t' << j << '\t' << distance << endl;
159                                                 }
160                                         }
161                                 }
162                                 else{
163                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
164                                         
165                                         for(int j=0;j<nseqs;j++){
166                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
167                                                 
168                                                 in >> distance;
169                         
170                                                 if (distance == -1) { distance = 1000000; }
171                                                 
172                                                 if(distance < cutoff && j < i){
173                                                         out << i << '\t' << j << '\t' << distance << endl;
174                                                 }
175                                                 
176                                         }
177                                 }
178                         }
179                 }
180                 
181                 list->setLabel("0");
182                 in.close();
183                 out.close();
184         
185                 if(nameMap == NULL){
186                         nameMap = new NameAssignment();
187                         for(int i=0;i<matrixNames.size();i++){
188                                 nameMap->push_back(matrixNames[i]);
189                         }
190                 }
191                 
192         
193                 ifstream in2;
194                 ofstream out2;
195                 
196                 string outputFile = m->getRootName(distFile) + "column.dist";
197                 m->openInputFile(tempFile, in2);
198                 m->openOutputFile(outputFile, out2);
199                 
200                 int first, second;
201                 float dist;
202                 
203                 while (in2) {
204                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; }
205                         
206                         in2 >> first >> second >> dist;
207                         out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
208                         m->gobble(in2);
209                 }
210                 in2.close();
211                 out2.close();
212                 
213                 m->mothurRemove(tempFile);
214                 distFile = outputFile;
215         
216                 if (m->control_pressed) {  m->mothurRemove(outputFile);  }
217
218                 return 0;
219         }
220         catch(exception& e) {
221                 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
222                 exit(1);
223         }
224 }
225 /***********************************************************************/
226
227 ReadCluster::~ReadCluster(){}
228 /***********************************************************************/
229