]> git.donarmstrong.com Git - mothur.git/blob - readcluster.cpp
fixed phylip convert for cluster.split command
[mothur.git] / readcluster.cpp
1 /*
2  *  readcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/28/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "readcluster.h"
11
12 /***********************************************************************/
13
14 ReadCluster::ReadCluster(string distfile, float c, string o, bool s){
15                 globaldata = GlobalData::getInstance();
16                 m = MothurOut::getInstance();
17         distFile = distfile;
18                 cutoff = c;
19                 outputDir = o;
20                 sortWanted = s;
21                 list = NULL;
22 }
23
24 /***********************************************************************/
25
26 int ReadCluster::read(NameAssignment* nameMap){
27         try {
28         
29                 if (format == "phylip") { convertPhylip2Column(nameMap); }
30                 else { list = new ListVector(nameMap->getListVector());  }
31                 
32                 if (m->control_pressed) { return 0; }
33                 
34                 if (sortWanted) {  OutPutFile = sortFile(distFile, outputDir);  }
35                 else {  OutPutFile = distFile;   } //for use by clusters splitMatrix to convert a phylip matrix to column
36                 
37                 return 0;
38                         
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "ReadCluster", "read");
42                 exit(1);
43         }
44 }
45 /***********************************************************************/
46
47 int ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
48         try {   
49                 //convert phylip file to column file
50                 map<int, string> rowToName;
51                 map<int, string>::iterator it;
52                 
53                 ifstream in;
54                 ofstream out;
55                 string tempFile = distFile + ".column.temp";
56                 
57                 openInputFile(distFile, in);  gobble(in);
58                 openOutputFile(tempFile, out);
59                 
60                 float distance;
61                 int square, nseqs;
62                 string name;
63                 vector<string> matrixNames;
64         
65                 in >> nseqs >> name;
66                 rowToName[0] = name;
67                 matrixNames.push_back(name);
68                 
69                 if(nameMap == NULL){
70                         list = new ListVector(nseqs);
71                         list->set(0, name);
72                 }
73                 else{
74                         list = new ListVector(nameMap->getListVector());
75                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
76                 }
77         
78                 char d;
79                 while((d=in.get()) != EOF){
80                         
81                         if(isalnum(d)){
82                                 square = 1;
83                                 in.putback(d);
84                                 for(int i=0;i<nseqs;i++){
85                                         in >> distance;
86                                 }
87                                 break;
88                         }
89                         if(d == '\n'){
90                                 square = 0;
91                                 break;
92                         }
93                 }
94         
95                 if(square == 0){
96                                         
97                         for(int i=1;i<nseqs;i++){
98                                 in >> name;
99                                 rowToName[i] = name;
100                                 matrixNames.push_back(name);
101                                 
102                                 //there's A LOT of repeated code throughout this method...
103                                 if(nameMap == NULL){
104                                         list->set(i, name);
105                                         
106                                         for(int j=0;j<i;j++){
107                                         
108                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
109                                                 
110                                                 in >> distance;
111                                                 
112                                                 if (distance == -1) { distance = 1000000; }
113                                                 
114                                                 if(distance < cutoff){
115                                                         out << i << '\t' << j << '\t' << distance << endl;
116                                                 }
117                                         }
118                                         
119                                 }
120                                 else{
121                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
122                                         
123                                         for(int j=0;j<i;j++){
124                                                 
125                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
126                                                 
127                                                 in >> distance;
128                                                 
129                                                 if (distance == -1) { distance = 1000000; }
130                                                 
131                                                 if(distance < cutoff){
132                                                         out << i << '\t' << j << '\t' << distance << endl;
133                                                 }
134                                         }
135                                 }
136                         }
137                 }
138                 else{
139                         for(int i=1;i<nseqs;i++){
140                                 in >> name;                
141                                 rowToName[i] = name;
142                                 matrixNames.push_back(name);
143                 
144                                 if(nameMap == NULL){
145                                         list->set(i, name);
146                                         for(int j=0;j<nseqs;j++){
147                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
148                                                 
149                                                 in >> distance;
150                                         
151                                                 if (distance == -1) { distance = 1000000; }
152                                                 
153                                                 if(distance < cutoff && j < i){
154                                                         out << i << '\t' << j << '\t' << distance << endl;
155                                                 }
156                                         }
157                                 }
158                                 else{
159                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
160                                         
161                                         for(int j=0;j<nseqs;j++){
162                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
163                                                 
164                                                 in >> distance;
165                         
166                                                 if (distance == -1) { distance = 1000000; }
167                                                 
168                                                 if(distance < cutoff && j < i){
169                                                         out << i << '\t' << j << '\t' << distance << endl;
170                                                 }
171                                                 
172                                         }
173                                 }
174                         }
175                 }
176                 
177                 list->setLabel("0");
178                 in.close();
179                 out.close();
180         
181                 if(nameMap == NULL){
182                         nameMap = new NameAssignment();
183                         for(int i=0;i<matrixNames.size();i++){
184                                 nameMap->push_back(matrixNames[i]);
185                         }
186                         globaldata->nameMap = nameMap;
187                 }
188                 
189         
190                 ifstream in2;
191                 ofstream out2;
192                 
193                 string outputFile = getRootName(distFile) + "column.dist";
194                 openInputFile(tempFile, in2);
195                 openOutputFile(outputFile, out2);
196                 
197                 int first, second;
198                 float dist;
199                 
200                 while (in2) {
201                         if (m->control_pressed) { in2.close(); out2.close(); remove(tempFile.c_str()); remove(outputFile.c_str()); return 0; }
202                         
203                         in2 >> first >> second >> dist;
204                         out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
205                         gobble(in2);
206                 }
207                 in2.close();
208                 out2.close();
209                 
210                 remove(tempFile.c_str());
211                 distFile = outputFile;
212         
213                 if (m->control_pressed) {  remove(outputFile.c_str());  }
214
215                 return 0;
216         }
217         catch(exception& e) {
218                 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
219                 exit(1);
220         }
221 }
222 /***********************************************************************/
223
224 ReadCluster::~ReadCluster(){}
225 /***********************************************************************/
226