]> git.donarmstrong.com Git - mothur.git/blob - readcluster.cpp
precluster command finished
[mothur.git] / readcluster.cpp
1 /*
2  *  readcluster.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/28/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "readcluster.h"
11
12 /***********************************************************************/
13
14 ReadCluster::ReadCluster(string distfile, float c){
15                 globaldata = GlobalData::getInstance();
16         distFile = distfile;
17                 cutoff = c;
18 }
19
20 /***********************************************************************/
21
22 void ReadCluster::read(NameAssignment* nameMap){
23         try {
24         
25                 if (format == "phylip") { convertPhylip2Column(nameMap); }
26                 else { list = new ListVector(nameMap->getListVector());  }
27                 
28                 OutPutFile = sortFile(distFile);
29                         
30         }
31         catch(exception& e) {
32                 errorOut(e, "ReadCluster", "read");
33                 exit(1);
34         }
35 }
36 /***********************************************************************/
37
38 void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){
39         try {   
40                 //convert phylip file to column file
41                 map<int, string> rowToName;
42                 map<int, string>::iterator it;
43                 
44                 ifstream in;
45                 ofstream out;
46                 string tempFile = distFile + ".column.temp";
47                 
48                 openInputFile(distFile, in);
49                 openOutputFile(tempFile, out);
50                 
51                 float distance;
52                 int square, nseqs;
53                 string name;
54                 vector<string> matrixNames;
55         
56                 in >> nseqs >> name;
57                 rowToName[0] = name;
58                 matrixNames.push_back(name);
59                 
60                 if(nameMap == NULL){
61                         list = new ListVector(nseqs);
62                         list->set(0, name);
63                 }
64                 else{
65                         list = new ListVector(nameMap->getListVector());
66                         if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
67                 }
68         
69                 char d;
70                 while((d=in.get()) != EOF){
71                         
72                         if(isalnum(d)){
73                                 square = 1;
74                                 in.putback(d);
75                                 for(int i=0;i<nseqs;i++){
76                                         in >> distance;
77                                 }
78                                 break;
79                         }
80                         if(d == '\n'){
81                                 square = 0;
82                                 break;
83                         }
84                 }
85         
86                 if(square == 0){
87                                         
88                         for(int i=1;i<nseqs;i++){
89                                 in >> name;
90                                 rowToName[i] = name;
91                                 matrixNames.push_back(name);
92                                 
93                                 //there's A LOT of repeated code throughout this method...
94                                 if(nameMap == NULL){
95                                         list->set(i, name);
96                                         
97                                         for(int j=0;j<i;j++){
98                                                 in >> distance;
99                                                 
100                                                 if (distance == -1) { distance = 1000000; }
101                                                 
102                                                 if(distance < cutoff){
103                                                         out << i << '\t' << j << '\t' << distance << endl;
104                                                 }
105                                         }
106                                         
107                                 }
108                                 else{
109                                         if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
110                                         
111                                         for(int j=0;j<i;j++){
112                                                 in >> distance;
113                                                 
114                                                 if (distance == -1) { distance = 1000000; }
115                                                 
116                                                 if(distance < cutoff){
117                                                         out << i << '\t' << j << '\t' << distance << endl;
118                                                 }
119                                         }
120                                 }
121                         }
122                 }
123                 else{
124                         for(int i=1;i<nseqs;i++){
125                                 in >> name;                
126                                 rowToName[i] = name;
127                                 matrixNames.push_back(name);
128                 
129                                 if(nameMap == NULL){
130                                         list->set(i, name);
131                                         for(int j=0;j<nseqs;j++){
132                                                 in >> distance;
133                                         
134                                                 if (distance == -1) { distance = 1000000; }
135                                                 
136                                                 if(distance < cutoff && j < i){
137                                                         out << i << '\t' << j << '\t' << distance << endl;
138                                                 }
139                                         }
140                                 }
141                                 else{
142                                         if(nameMap->count(name)==0){        mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); }
143                                         
144                                         for(int j=0;j<nseqs;j++){
145                                                 in >> distance;
146                         
147                                                 if (distance == -1) { distance = 1000000; }
148                                                 
149                                                 if(distance < cutoff && j < i){
150                                                         out << i << '\t' << j << '\t' << distance << endl;
151                                                 }
152                                                 
153                                         }
154                                 }
155                         }
156                 }
157                 
158                 list->setLabel("0");
159                 in.close();
160                 out.close();
161                 
162                 if(nameMap == NULL){
163                         nameMap = new NameAssignment();
164                         for(int i=0;i<matrixNames.size();i++){
165                                 nameMap->push_back(matrixNames[i]);
166                         }
167                         globaldata->nameMap = nameMap;
168                 }
169                 
170         
171                 ifstream in2;
172                 ofstream out2;
173                 
174                 string outputFile = getRootName(distFile) + "column.dist";
175                 openInputFile(tempFile, in2);
176                 openOutputFile(outputFile, out2);
177                 
178                 int first, second;
179                 float dist;
180                 
181                 while (in2) {
182                         in2 >> first >> second >> dist;
183                         out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl;
184                         gobble(in2);
185                 }
186                 in2.close();
187                 out2.close();
188                 
189                 remove(tempFile.c_str());
190                 distFile = outputFile;
191         }
192         catch(exception& e) {
193                 errorOut(e, "ReadCluster", "convertPhylip2Column");
194                 exit(1);
195         }
196 }
197 /***********************************************************************/
198
199 ReadCluster::~ReadCluster(){}
200 /***********************************************************************/
201