]> git.donarmstrong.com Git - mothur.git/blob - readcluster.cpp
removed read.dist, read.otu, read.tree and globaldata. added current to defaults...
[mothur.git] / readcluster.cpp
1 /*
2  *  readcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/28/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "readcluster.h"
11
12 /***********************************************************************/
13
14 ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
15                 m = MothurOut::getInstance();
16         distFile = distfile;
17                 cutoff = c;
18                 outputDir = o;
19                 sortWanted = s;
20                 list = NULL;
21 }
22
23 /***********************************************************************/
24
25 int ReadCluster::read(NameAssignment* nameMap){
26         try {
27         
28                 if (format == "phylip") { convertPhylip2Column(nameMap); }
29                 else { list = new ListVector(nameMap->getListVector());  }
30                 
31                 if (m->control_pressed) { return 0; }
32                 
33                 if (sortWanted) {  OutPutFile = m->sortFile(distFile, outputDir);  }
34                 else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
35                 
36                 return 0;
37                         
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "ReadCluster", "read");
41                 exit(1);
42         }
43 }
44 /***********************************************************************/
45
46 int ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
47         try {   
48                 //convert phylip file to column file
49                 map<int, string> rowToName;
50                 map<int, string>::iterator it;
51                 
52                 ifstream in;
53                 ofstream out;
54                 string tempFile = distFile + ".column.temp";
55                 
56                 m->openInputFile(distFile, in);  m->gobble(in);
57                 m->openOutputFile(tempFile, out);
58                 
59                 float distance;
60                 int square, nseqs;
61                 string name;
62                 vector<string> matrixNames;
63         
64                 in >> nseqs >> name;
65                 rowToName[0] = name;
66                 matrixNames.push_back(name);
67                 
68                 if(nameMap == NULL){
69                         list = new ListVector(nseqs);
70                         list->set(0, name);
71                 }
72                 else{
73                         list = new ListVector(nameMap->getListVector());
74                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
75                 }
76         
77                 char d;
78                 while((d=in.get()) != EOF){
79                         
80                         if(isalnum(d)){
81                                 square = 1;
82                                 in.putback(d);
83                                 for(int i=0;i<nseqs;i++){
84                                         in >> distance;
85                                 }
86                                 break;
87                         }
88                         if(d == '\n'){
89                                 square = 0;
90                                 break;
91                         }
92                 }
93         
94                 if(square == 0){
95                                         
96                         for(int i=1;i<nseqs;i++){
97                                 in >> name;
98                                 rowToName[i] = name;
99                                 matrixNames.push_back(name);
100                                 
101                                 //there's A LOT of repeated code throughout this method...
102                                 if(nameMap == NULL){
103                                         list->set(i, name);
104                                         
105                                         for(int j=0;j<i;j++){
106                                         
107                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
108                                                 
109                                                 in >> distance;
110                                                 
111                                                 if (distance == -1) { distance = 1000000; }
112                                                 
113                                                 if(distance < cutoff){
114                                                         out << i << '\t' << j << '\t' << distance << endl;
115                                                 }
116                                         }
117                                         
118                                 }
119                                 else{
120                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
121                                         
122                                         for(int j=0;j<i;j++){
123                                                 
124                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
125                                                 
126                                                 in >> distance;
127                                                 
128                                                 if (distance == -1) { distance = 1000000; }
129                                                 
130                                                 if(distance < cutoff){
131                                                         out << i << '\t' << j << '\t' << distance << endl;
132                                                 }
133                                         }
134                                 }
135                         }
136                 }
137                 else{
138                         for(int i=1;i<nseqs;i++){
139                                 in >> name;                
140                                 rowToName[i] = name;
141                                 matrixNames.push_back(name);
142                 
143                                 if(nameMap == NULL){
144                                         list->set(i, name);
145                                         for(int j=0;j<nseqs;j++){
146                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
147                                                 
148                                                 in >> distance;
149                                         
150                                                 if (distance == -1) { distance = 1000000; }
151                                                 
152                                                 if(distance < cutoff && j < i){
153                                                         out << i << '\t' << j << '\t' << distance << endl;
154                                                 }
155                                         }
156                                 }
157                                 else{
158                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
159                                         
160                                         for(int j=0;j<nseqs;j++){
161                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
162                                                 
163                                                 in >> distance;
164                         
165                                                 if (distance == -1) { distance = 1000000; }
166                                                 
167                                                 if(distance < cutoff && j < i){
168                                                         out << i << '\t' << j << '\t' << distance << endl;
169                                                 }
170                                                 
171                                         }
172                                 }
173                         }
174                 }
175                 
176                 list->setLabel("0");
177                 in.close();
178                 out.close();
179         
180                 if(nameMap == NULL){
181                         nameMap = new NameAssignment();
182                         for(int i=0;i<matrixNames.size();i++){
183                                 nameMap->push_back(matrixNames[i]);
184                         }
185                 }
186                 
187         
188                 ifstream in2;
189                 ofstream out2;
190                 
191                 string outputFile = m->getRootName(distFile) + "column.dist";
192                 m->openInputFile(tempFile, in2);
193                 m->openOutputFile(outputFile, out2);
194                 
195                 int first, second;
196                 float dist;
197                 
198                 while (in2) {
199                         if (m->control_pressed) { in2.close(); out2.close(); remove(tempFile.c_str()); remove(outputFile.c_str()); return 0; }
200                         
201                         in2 >> first >> second >> dist;
202                         out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
203                         m->gobble(in2);
204                 }
205                 in2.close();
206                 out2.close();
207                 
208                 remove(tempFile.c_str());
209                 distFile = outputFile;
210         
211                 if (m->control_pressed) {  remove(outputFile.c_str());  }
212
213                 return 0;
214         }
215         catch(exception& e) {
216                 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
217                 exit(1);
218         }
219 }
220 /***********************************************************************/
221
222 ReadCluster::~ReadCluster(){}
223 /***********************************************************************/
224