]> git.donarmstrong.com Git - mothur.git/blob - readcluster.cpp
some changes while testing 1.9
[mothur.git] / readcluster.cpp
1 /*
2  *  readcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/28/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "readcluster.h"
11
12 /***********************************************************************/
13
14 ReadCluster::ReadCluster(string distfile, float c, string o){
15                 globaldata = GlobalData::getInstance();
16                 m = MothurOut::getInstance();
17         distFile = distfile;
18                 cutoff = c;
19                 outputDir = o;
20 }
21
22 /***********************************************************************/
23
24 int ReadCluster::read(NameAssignment* nameMap){
25         try {
26         
27                 if (format == "phylip") { convertPhylip2Column(nameMap); }
28                 else { list = new ListVector(nameMap->getListVector());  }
29                 
30                 if (m->control_pressed) { return 0; }
31                 
32                 OutPutFile = sortFile(distFile, outputDir);
33                 
34                 return 0;
35                         
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "ReadCluster", "read");
39                 exit(1);
40         }
41 }
42 /***********************************************************************/
43
44 int ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
45         try {   
46                 //convert phylip file to column file
47                 map<int, string> rowToName;
48                 map<int, string>::iterator it;
49                 
50                 ifstream in;
51                 ofstream out;
52                 string tempFile = distFile + ".column.temp";
53                 
54                 openInputFile(distFile, in);
55                 openOutputFile(tempFile, out);
56                 
57                 float distance;
58                 int square, nseqs;
59                 string name;
60                 vector<string> matrixNames;
61         
62                 in >> nseqs >> name;
63                 rowToName[0] = name;
64                 matrixNames.push_back(name);
65                 
66                 if(nameMap == NULL){
67                         list = new ListVector(nseqs);
68                         list->set(0, name);
69                 }
70                 else{
71                         list = new ListVector(nameMap->getListVector());
72                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
73                 }
74         
75                 char d;
76                 while((d=in.get()) != EOF){
77                         
78                         if(isalnum(d)){
79                                 square = 1;
80                                 in.putback(d);
81                                 for(int i=0;i<nseqs;i++){
82                                         in >> distance;
83                                 }
84                                 break;
85                         }
86                         if(d == '\n'){
87                                 square = 0;
88                                 break;
89                         }
90                 }
91         
92                 if(square == 0){
93                                         
94                         for(int i=1;i<nseqs;i++){
95                                 in >> name;
96                                 rowToName[i] = name;
97                                 matrixNames.push_back(name);
98                                 
99                                 //there's A LOT of repeated code throughout this method...
100                                 if(nameMap == NULL){
101                                         list->set(i, name);
102                                         
103                                         for(int j=0;j<i;j++){
104                                         
105                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
106                                                 
107                                                 in >> distance;
108                                                 
109                                                 if (distance == -1) { distance = 1000000; }
110                                                 
111                                                 if(distance < cutoff){
112                                                         out << i << '\t' << j << '\t' << distance << endl;
113                                                 }
114                                         }
115                                         
116                                 }
117                                 else{
118                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
119                                         
120                                         for(int j=0;j<i;j++){
121                                                 
122                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
123                                                 
124                                                 in >> distance;
125                                                 
126                                                 if (distance == -1) { distance = 1000000; }
127                                                 
128                                                 if(distance < cutoff){
129                                                         out << i << '\t' << j << '\t' << distance << endl;
130                                                 }
131                                         }
132                                 }
133                         }
134                 }
135                 else{
136                         for(int i=1;i<nseqs;i++){
137                                 in >> name;                
138                                 rowToName[i] = name;
139                                 matrixNames.push_back(name);
140                 
141                                 if(nameMap == NULL){
142                                         list->set(i, name);
143                                         for(int j=0;j<nseqs;j++){
144                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
145                                                 
146                                                 in >> distance;
147                                         
148                                                 if (distance == -1) { distance = 1000000; }
149                                                 
150                                                 if(distance < cutoff && j < i){
151                                                         out << i << '\t' << j << '\t' << distance << endl;
152                                                 }
153                                         }
154                                 }
155                                 else{
156                                         if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
157                                         
158                                         for(int j=0;j<nseqs;j++){
159                                                 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); return 0; }
160                                                 
161                                                 in >> distance;
162                         
163                                                 if (distance == -1) { distance = 1000000; }
164                                                 
165                                                 if(distance < cutoff && j < i){
166                                                         out << i << '\t' << j << '\t' << distance << endl;
167                                                 }
168                                                 
169                                         }
170                                 }
171                         }
172                 }
173                 
174                 list->setLabel("0");
175                 in.close();
176                 out.close();
177                 
178                 if(nameMap == NULL){
179                         nameMap = new NameAssignment();
180                         for(int i=0;i<matrixNames.size();i++){
181                                 nameMap->push_back(matrixNames[i]);
182                         }
183                         globaldata->nameMap = nameMap;
184                 }
185                 
186         
187                 ifstream in2;
188                 ofstream out2;
189                 
190                 string outputFile = getRootName(distFile) + "column.dist";
191                 openInputFile(tempFile, in2);
192                 openOutputFile(outputFile, out2);
193                 
194                 int first, second;
195                 float dist;
196                 
197                 while (in2) {
198                         if (m->control_pressed) { in2.close(); out2.close(); remove(tempFile.c_str()); remove(outputFile.c_str()); return 0; }
199                         
200                         in2 >> first >> second >> dist;
201                         out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
202                         gobble(in2);
203                 }
204                 in2.close();
205                 out2.close();
206                 
207                 remove(tempFile.c_str());
208                 distFile = outputFile;
209                 
210                 if (m->control_pressed) {  remove(outputFile.c_str());  }
211
212                 return 0;
213         }
214         catch(exception& e) {
215                 m->errorOut(e, "ReadCluster", "convertPhylip2Column");
216                 exit(1);
217         }
218 }
219 /***********************************************************************/
220
221 ReadCluster::~ReadCluster(){}
222 /***********************************************************************/
223