]> git.donarmstrong.com Git - mothur.git/blob - readcluster.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / readcluster.cpp
1 /*
2  *  readcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/28/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "readcluster.h"
11
12 /***********************************************************************/
13
14 ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
15                 m = MothurOut::getInstance();
16         distFile = distfile;
17                 cutoff = c;
18                 outputDir = o;
19                 sortWanted = s;
20                 list = NULL;
21 }
22
23 /***********************************************************************/
24
25 int ReadCluster::read(NameAssignment*& nameMap){
26         try {
27         
28                 if (format == "phylip") { convertPhylip2Column(nameMap); }
29                 else { list = new ListVector(nameMap->getListVector());  }
30                 
31                 if (m->control_pressed) { return 0; }
32                 
33                 if (sortWanted) {  OutPutFile = m->sortFile(distFile, outputDir);  }
34                 else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
35                 
36                 return 0;
37                         
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "ReadCluster", "read");
41                 exit(1);
42         }
43 }
44 /***********************************************************************/
45 int ReadCluster::read(CountTable*& ct){
46         try {
47         
48                 if (format == "phylip") { convertPhylip2Column(ct); }
49                 else { list = new ListVector(ct->getListVector());  }
50                 
51                 if (m->control_pressed) { return 0; }
52                 
53                 if (sortWanted) {  OutPutFile = m->sortFile(distFile, outputDir);  }
54                 else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
55                 
56                 return 0;
57         
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "ReadCluster", "read");
61                 exit(1);
62         }
63 }
64 /***********************************************************************/
65
66 int ReadCluster::convertPhylip2Column(NameAssignment*& nameMap){
67         try {   
68                 //convert phylip file to column file
69                 map<int, string> rowToName;
70                 map<int, string>::iterator it;
71                 
72                 ifstream in;
73                 ofstream out;
74                 string tempFile = distFile + ".column.temp";
75                 
76                 m->openInputFile(distFile, in);  m->gobble(in);
77                 m->openOutputFile(tempFile, out);
78                 
79                 float distance;
80                 int square, nseqs;
81                 string name;
82                 vector<string> matrixNames;
83                 
84                 string numTest;
85                 in >> numTest >> name;
86                 
87                 if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
88                 else { convert(numTest, nseqs); }
89                 
90                 rowToName[0] = name;
91                 matrixNames.push_back(name);
92                 
93                 if(nameMap == NULL){
94                         list = new ListVector(nseqs);
95                         list->set(0, name);
96                 }
97                 else{
98                         list = new ListVector(nameMap->getListVector());
99                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
100                 }
101         
102                 char d;
103                 while((d=in.get()) != EOF){
104                         
105                         if(isalnum(d)){
106                                 square = 1;
107                                 in.putback(d);
108                                 for(int i=0;i<nseqs;i++){
109                                         in >> distance;
110                                 }
111                                 break;
112                         }
113                         if(d == '\n'){
114                                 square = 0;
115                                 break;
116                         }
117                 }
118         
119                 if(square == 0){
120                                         
121                         for(int i=1;i<nseqs;i++){
122                                 in >> name;
123                                 rowToName[i] = name;
124                                 matrixNames.push_back(name);
125                                 
126                                 //there's A LOT of repeated code throughout this method...
127                                 if(nameMap == NULL){
128                                         list->set(i, name);
129                                         
130                                         for(int j=0;j<i;j++){
131                                         
132                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
133                                                 
134                                                 in >> distance;
135                                                 
136                                                 if (distance == -1) { distance = 1000000; }
137                                                 
138                                                 if(distance < cutoff){
139                                                         out << i << '\t' << j << '\t' << distance << endl;
140                                                 }
141                                         }
142                                         
143                                 }
144                                 else{
145                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
146                                         
147                                         for(int j=0;j<i;j++){
148                                                 
149                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
150                                                 
151                                                 in >> distance;
152                                                 
153                                                 if (distance == -1) { distance = 1000000; }
154                                                 
155                                                 if(distance < cutoff){
156                                                         out << i << '\t' << j << '\t' << distance << endl;
157                                                 }
158                                         }
159                                 }
160                         }
161                 }
162                 else{
163                         for(int i=1;i<nseqs;i++){
164                                 in >> name;                
165                                 rowToName[i] = name;
166                                 matrixNames.push_back(name);
167                 
168                                 if(nameMap == NULL){
169                                         list->set(i, name);
170                                         for(int j=0;j<nseqs;j++){
171                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
172                                                 
173                                                 in >> distance;
174                                         
175                                                 if (distance == -1) { distance = 1000000; }
176                                                 
177                                                 if(distance < cutoff && j < i){
178                                                         out << i << '\t' << j << '\t' << distance << endl;
179                                                 }
180                                         }
181                                 }
182                                 else{
183                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
184                                         
185                                         for(int j=0;j<nseqs;j++){
186                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
187                                                 
188                                                 in >> distance;
189                         
190                                                 if (distance == -1) { distance = 1000000; }
191                                                 
192                                                 if(distance < cutoff && j < i){
193                                                         out << i << '\t' << j << '\t' << distance << endl;
194                                                 }
195                                                 
196                                         }
197                                 }
198                         }
199                 }
200                 
201                 list->setLabel("0");
202                 in.close();
203                 out.close();
204         
205                 if(nameMap == NULL){
206                         nameMap = new NameAssignment();
207                         for(int i=0;i<matrixNames.size();i++){
208                                 nameMap->push_back(matrixNames[i]);
209                         }
210                 }
211                 
212         
213                 ifstream in2;
214                 ofstream out2;
215                 
216                 string outputFile = m->getRootName(distFile) + "column.dist";
217                 m->openInputFile(tempFile, in2);
218                 m->openOutputFile(outputFile, out2);
219                 
220                 int first, second;
221                 float dist;
222                 
223                 while (in2) {
224                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; }
225                         
226                         in2 >> first >> second >> dist;
227                         out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
228                         m->gobble(in2);
229                 }
230                 in2.close();
231                 out2.close();
232                 
233                 m->mothurRemove(tempFile);
234                 distFile = outputFile;
235         
236                 if (m->control_pressed) {  m->mothurRemove(outputFile);  }
237
238                 return 0;
239         }
240         catch(exception& e) {
241                 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
242                 exit(1);
243         }
244 }
245 /***********************************************************************/
246
247 int ReadCluster::convertPhylip2Column(CountTable*& ct){
248         try {   
249                 //convert phylip file to column file
250                 map<int, string> rowToName;
251                 map<int, string>::iterator it;
252                 
253                 ifstream in;
254                 ofstream out;
255                 string tempFile = distFile + ".column.temp";
256                 
257                 m->openInputFile(distFile, in);  m->gobble(in);
258                 m->openOutputFile(tempFile, out);
259                 
260                 float distance;
261                 int square, nseqs;
262                 string name;
263                 vector<string> matrixNames;
264                 
265                 string numTest;
266                 in >> numTest >> name;
267                 
268                 if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
269                 else { convert(numTest, nseqs); }
270                 
271                 rowToName[0] = name;
272                 matrixNames.push_back(name);
273                 
274                 if(ct == NULL){
275                         list = new ListVector(nseqs);
276                         list->set(0, name);
277                 }
278                 else{  list = new ListVector(ct->getListVector()); }
279         
280                 char d;
281                 while((d=in.get()) != EOF){
282                         
283                         if(isalnum(d)){
284                                 square = 1;
285                                 in.putback(d);
286                                 for(int i=0;i<nseqs;i++){
287                                         in >> distance;
288                                 }
289                                 break;
290                         }
291                         if(d == '\n'){
292                                 square = 0;
293                                 break;
294                         }
295                 }
296         
297                 if(square == 0){
298             
299                         for(int i=1;i<nseqs;i++){
300                                 in >> name;
301                                 rowToName[i] = name;
302                                 matrixNames.push_back(name);
303                                 
304                                 //there's A LOT of repeated code throughout this method...
305                                 if(ct == NULL){
306                                         list->set(i, name);
307                                         
308                                         for(int j=0;j<i;j++){
309                         
310                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
311                                                 
312                                                 in >> distance;
313                                                 
314                                                 if (distance == -1) { distance = 1000000; }
315                                                 
316                                                 if(distance < cutoff){
317                                                         out << i << '\t' << j << '\t' << distance << endl;
318                                                 }
319                                         }
320                                         
321                                 }
322                                 else{
323                                         
324                                         for(int j=0;j<i;j++){
325                                                 
326                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
327                                                 
328                                                 in >> distance;
329                                                 
330                                                 if (distance == -1) { distance = 1000000; }
331                                                 
332                                                 if(distance < cutoff){
333                                                         out << i << '\t' << j << '\t' << distance << endl;
334                                                 }
335                                         }
336                                 }
337                         }
338                 }
339                 else{
340                         for(int i=1;i<nseqs;i++){
341                                 in >> name;                
342                                 rowToName[i] = name;
343                                 matrixNames.push_back(name);
344                 
345                                 if(ct == NULL){
346                                         list->set(i, name);
347                                         for(int j=0;j<nseqs;j++){
348                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
349                                                 
350                                                 in >> distance;
351                         
352                                                 if (distance == -1) { distance = 1000000; }
353                                                 
354                                                 if(distance < cutoff && j < i){
355                                                         out << i << '\t' << j << '\t' << distance << endl;
356                                                 }
357                                         }
358                                 }
359                                 else{
360                                         for(int j=0;j<nseqs;j++){
361                                                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; }
362                                                 
363                                                 in >> distance;
364                         
365                                                 if (distance == -1) { distance = 1000000; }
366                                                 
367                                                 if(distance < cutoff && j < i){
368                                                         out << i << '\t' << j << '\t' << distance << endl;
369                                                 }
370                                                 
371                                         }
372                                 }
373                         }
374                 }
375                 
376                 list->setLabel("0");
377                 in.close();
378                 out.close();
379         
380                 if(ct == NULL){
381                         ct = new CountTable();
382                         for(int i=0;i<matrixNames.size();i++){
383                                 ct->push_back(matrixNames[i]);
384                         }
385                 }
386                 
387         
388                 ifstream in2;
389                 ofstream out2;
390                 
391                 string outputFile = m->getRootName(distFile) + "column.dist";
392                 m->openInputFile(tempFile, in2);
393                 m->openOutputFile(outputFile, out2);
394                 
395                 int first, second;
396                 float dist;
397                 
398                 while (in2) {
399                         if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; }
400                         
401                         in2 >> first >> second >> dist;
402                         out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
403                         m->gobble(in2);
404                 }
405                 in2.close();
406                 out2.close();
407                 
408                 m->mothurRemove(tempFile);
409                 distFile = outputFile;
410         
411                 if (m->control_pressed) {  m->mothurRemove(outputFile);  }
412         
413                 return 0;
414         }
415         catch(exception& e) {
416                 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
417                 exit(1);
418         }
419 }
420 /***********************************************************************/
421
422 ReadCluster::~ReadCluster(){}
423 /***********************************************************************/
424